Recut branch for Luke from new branch which was merged with trunk on
2012-06-19. Rename old branch for (temporary) reference. (part 2)
git-svn-id: https://svn.apache.org/repos/asf/incubator/tashi/branches/luke-zoni@1351890 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/INSTALL b/INSTALL
index 6240c35..127cc2b 100644
--- a/INSTALL
+++ b/INSTALL
@@ -124,6 +124,7 @@
given by the hostname command. If you plan on eventually having several
hosts and networks, feel free to add them now.
+root@grml:/usr/local/tashi# cd bin
root@grml:/usr/local/tashi/bin# DEBUG=1 ./clustermanager
2012-01-26 23:12:33,972 [./clustermanager:INFO] Using configuration file(s) ['/usr/local/tashi/etc/TashiDefaults.cfg']
2012-01-26 23:12:33,972 [./clustermanager:INFO] Starting cluster manager
@@ -152,15 +153,14 @@
In [1]: from tashi.rpycservices.rpyctypes import Host, HostState, Network
-In [2]: data.baseDataObject.hosts[1] = Host(d={'id':1,'name':'grml','state': HostState.Normal,'up':False})
+In [2]: data.baseDataObject.hosts[0] = Host(d={'id':0,'name':'grml','state': HostState.Normal,'up':False})
-In [3]: data.baseDataObject.networks[1]=Network(d={'id':0,'name':'default'})
+In [3]: data.baseDataObject.networks[0]=Network(d={'id':0,'name':'My Network'})
In [4]: data.baseDataObject.save()
-In [5]: import os
-
-In [6]: os.kill(os.getpid(), 9)
+In [5]: (^C)
+2012-03-07 20:00:00,456 [./bin/clustermanager:INFO] Exiting cluster manager after receiving a SIGINT signal
Run the cluster manager in the background:
root@grml:/usr/local/tashi/bin# ./clustermanager &
diff --git a/Makefile b/Makefile
index 57655a4..618050d 100644
--- a/Makefile
+++ b/Makefile
@@ -33,6 +33,7 @@
mkdir apache-tashi
cp -rp doc etc Makefile src DISCLAIMER INSTALL LICENSE NOTICE README apache-tashi/
find apache-tashi -type d -name ".svn"|xargs rm -rf
+ -chgrp -R incubator apache-tashi
tar zcf apache-tashi.tar.gz apache-tashi
rm -rf apache-tashi
@@ -117,7 +118,7 @@
# Zoni
bin/zoni-cli:
@echo Symlinking in zoni-cli...
- (cd bin; ln -s ../src/zoni/client/zoni-cli.py .)
+ (cd bin; ln -s ../src/zoni/client/zoni-cli.py zoni-client)
# why necessarily put this in /usr/local/bin like nothing else?
usr/local/bin/zoni:
@echo Creating /usr/local/bin/zoni
@@ -127,11 +128,11 @@
if test -e /usr/local/bin/zoni; then echo Removing zoni...; rm /usr/local/bin/zoni; fi
## for now only print warnings having to do with bad indentation. pylint doesn't make it easy to enable only 1,2 checks
-disabled_warnings=$(shell pylint --list-msgs|grep :W0| awk -F: '{ORS=","; if ($$2 != "W0311" && $$2 != "W0312"){ print $$2}}')
+disabled_warnings=$(shell pylint --list-msgs|grep :W0| awk -F: '{ORS=","; if ($$2 != "W0311" && $$2 != "W0312"){ print $$2}}')",F0401"
pysrc=$(shell find . \! -path '*gen-py*' \! -path '*services*' \! -path '*messagingthrift*' \! -name '__init__.py' -name "*.py")
tidy: $(addprefix tidyfile/,$(pysrc))
- @echo Insuring .py files are nice and tidy!
+ @echo Ensured .py files are nice and tidy!
tidyfile/%: %
@echo Checking tidy for $*
- pylint --report=no --disable-msg-cat=R,C,E --disable-msg=$(disabled_warnings) --indent-string="\t" $* 2> /dev/null;
+ pylint --report=no --disable=R,C,E --disable=$(disabled_warnings) --indent-string="\t" $* 2> /dev/null;
diff --git a/doc/DEVELOPMENT b/doc/DEVELOPMENT
index 32d1b20..ac6197c 100644
--- a/doc/DEVELOPMENT
+++ b/doc/DEVELOPMENT
@@ -8,3 +8,9 @@
Other ideas:
* Make available a console aggregator for user's VMs.
+
+Python caveats:
+ * We've liked to use variables like bin, id, sum, input, etc. are
+ built-in, and will be flagged by pydev
+ * pydev does not like python modules with a dash in the name
+
diff --git a/doc/INSTALL2 b/doc/INSTALL2
index 66ad7dc..34c437f 100644
--- a/doc/INSTALL2
+++ b/doc/INSTALL2
@@ -48,6 +48,16 @@
Note that the entire path of a network connection must be configured to
use jumbo frames, if the virtual machines are to use them.
+If you have large numbers of VLANs, and don't want to hardcode them into
+each VM host, you can find a sample qemu-ifup in the doc directory. This
+script will need to be adapted to your local standards by changing the
+basic parameters at the top. This script can then be linked to by the name
+Tashi expects them to have. For example, if you have a VLAN 1001, you will
+create a link from /etc/qemu-ifup.1001 to this script.
+
+The script will handle the creation of the VM interface, and creation of the
+bridge and VLANs if they haven't been created before.
+
---+ Accounting server
An accounting server is available in the distribution. It will log
diff --git a/doc/sample.qemu-ifup b/doc/sample.qemu-ifup
new file mode 100644
index 0000000..3323211
--- /dev/null
+++ b/doc/sample.qemu-ifup
@@ -0,0 +1,51 @@
+#!/bin/sh
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# by Richard Gass and Michael Stroucken
+
+# Adapt the following two parameters to your installation
+# Uplink interface
+UPLINKIF="eth0"
+# Prefix for bridge naming
+BRIDGEPREFIX="br"
+
+vlanID=$(echo $0 | awk -F "ifup." '{print $2}')
+vmIf=$1
+
+# see if tagged interface exists
+bridgeUplinkIf="${UPLINKIF}.${vlanID}"
+cat /proc/net/vlan/config | grep "${bridgeUplinkIf} "
+if [ $? -gt 0 ];then
+ echo "creating tagged interface"
+ vconfig add ${UPLINKIF} ${vlanID}
+ ip link set ${bridgeUplinkIf} up
+fi
+
+# Check for the bridge
+bridgeName="${BRIDGEPREFIX}${vlanID}"
+brctl show | grep "^${bridgeName}"
+if [ $? -gt 0 ];then
+ echo "creating bridge interface"
+ brctl addbr ${bridgeName}
+ brctl addif ${bridgeName} ${bridgeUplinkIf}
+ ip link set ${bridgeName} up
+fi
+
+/sbin/ifconfig ${vmIf} 0.0.0.0 up
+/usr/sbin/brctl addif ${bridgeName} ${vmIf}
+exit 0
diff --git a/etc/NodeManager.cfg b/etc/NodeManager.cfg
index a47bccf..48f4044 100644
--- a/etc/NodeManager.cfg
+++ b/etc/NodeManager.cfg
@@ -80,7 +80,6 @@
statsInterval = 0.0
;accountingHost = clustermanager
;accountingPort = 2228
-;bind = 0.0.0.0 ; not supported (Thrift is missing support to specify what to bind to!)
[Security]
authAndEncrypt = False
diff --git a/etc/TashiDefaults.cfg b/etc/TashiDefaults.cfg
index fd034eb..1472174 100644
--- a/etc/TashiDefaults.cfg
+++ b/etc/TashiDefaults.cfg
@@ -54,10 +54,10 @@
allowMismatchedVersions = False
maxMemory = 8192
maxCores = 8
+defaultNetwork = 0
allowDuplicateNames = False
;accountingHost = clustermanager
;accountingPort = 2228
-;bind = 0.0.0.0 ; not supported (Thrift is missing support to specify what to bind to!)
[GetentOverride]
baseData = tashi.clustermanager.data.Pickled
@@ -110,11 +110,9 @@
clusterManagerHost = localhost
clusterManagerPort = 9882
statsInterval = 0.0
-;bind = 0.0.0.0 ; not supported (Thrift is missing support to specify what to bind to!)
[Qemu]
qemuBin = /usr/bin/kvm
-infoDir = /var/tmp/VmControlQemu/
pollDelay = 1.0
migrationRetries = 10
monitorTimeout = 60.0
@@ -124,6 +122,9 @@
statsInterval = 0.0
scratchDir = /tmp
scratchVg = vgscratch
+suspendHandler = gzip
+resumeHandler = zcat
+reservedMem = 512
[XenPV]
vmNamePrefix = tashi
diff --git a/src/tashi/accounting/accounting.py b/src/tashi/accounting/accounting.py
index 93d2999..698379a 100755
--- a/src/tashi/accounting/accounting.py
+++ b/src/tashi/accounting/accounting.py
@@ -17,8 +17,8 @@
# specific language governing permissions and limitations
# under the License.
+import os
import sys
-import signal
import logging.config
from tashi.rpycservices import rpycservices
@@ -26,13 +26,15 @@
#from rpyc.utils.authenticators import TlsliteVdbAuthenticator
#from tashi.rpycservices.rpyctypes import *
-from tashi.util import getConfig, createClient, instantiateImplementation, boolean, debugConsole, signalHandler
+from tashi.util import createClient, instantiateImplementation, debugConsole
+from tashi.utils.config import Config
+
import tashi
class Accounting(object):
- def __init__(self, config, cmclient):
+ def __init__(self, config):
self.config = config
- self.cm = cmclient
+ self.cm = createClient(config)
self.hooks = []
self.log = logging.getLogger(__file__)
@@ -43,17 +45,20 @@
name = name.lower()
if (name.startswith("hook")):
try:
- self.hooks.append(instantiateImplementation(value, config, cmclient, False))
+ self.hooks.append(instantiateImplementation(value, self.config, self.cm, False))
except:
self.log.exception("Failed to load hook %s" % (value))
def initAccountingServer(self):
service = instantiateImplementation(self.config.get("Accounting", "service"), self.config)
+ #XXXstroucki: disabled authAndEncrypt for now
#if boolean(self.config.get("Security", "authAndEncrypt")):
if False:
pass
else:
+ # XXXstroucki: ThreadedServer is liable to have
+ # exceptions within if an endpoint is lost.
t = ThreadedServer(service=rpycservices.ManagerService, hostname='0.0.0.0', port=int(self.config.get('AccountingService', 'port')), auto_register=False)
t.logger.setLevel(logging.ERROR)
@@ -62,25 +67,44 @@
debugConsole(globals())
- try:
- t.start()
- except KeyboardInterrupt:
- self.handleSIGTERM(signal.SIGTERM, None)
-
- @signalHandler(signal.SIGTERM)
- def handleSIGTERM(self, signalNumber, stackFrame):
- self.log.info('Exiting cluster manager after receiving a SIGINT signal')
+ t.start()
+ # shouldn't exit by itself
sys.exit(0)
def main():
- (config, configFiles) = getConfig(["Accounting"])
+ config = Config(["Accounting"])
+ configFiles = config.getFiles()
publisher = instantiateImplementation(config.get("Accounting", "publisher"), config)
tashi.publisher = publisher
- cmclient = createClient(config)
logging.config.fileConfig(configFiles)
- accounting = Accounting(config, cmclient)
+ log = logging.getLogger(__name__)
+ log.info('Using configuration file(s) %s' % configFiles)
- accounting.initAccountingServer()
+ accounting = Accounting(config)
+
+ # handle keyboard interrupts (http://code.activestate.com/recipes/496735-workaround-for-missed-sigint-in-multithreaded-prog/)
+ child = os.fork()
+
+ if child == 0:
+ accounting.initAccountingServer()
+ # shouldn't exit by itself
+ sys.exit(0)
+
+ else:
+ # main
+ try:
+ os.waitpid(child, 0)
+ except KeyboardInterrupt:
+ log.info("Exiting accounting service after receiving a SIGINT signal")
+ os._exit(0)
+ except Exception:
+ log.exception("Abnormal termination of accounting service")
+ os._exit(-1)
+
+ log.info("Exiting accounting service after service thread exited")
+ os._exit(-1)
+
+ return
if __name__ == "__main__":
main()
diff --git a/src/tashi/accounting/accountingservice.py b/src/tashi/accounting/accountingservice.py
index b1c035a..56c1c90 100644
--- a/src/tashi/accounting/accountingservice.py
+++ b/src/tashi/accounting/accountingservice.py
@@ -5,15 +5,15 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
-# under the License.
+# under the License.
import logging
import threading
@@ -22,43 +22,43 @@
from tashi import createClient
class AccountingService(object):
- """RPC service for the Accounting service"""
-
- def __init__(self, config):
- self.log = logging.getLogger(__name__)
- self.log.setLevel(logging.INFO)
+ """RPC service for the Accounting service"""
- self.config = config
+ def __init__(self, config):
+ self.log = logging.getLogger(__name__)
+ self.log.setLevel(logging.INFO)
- self.pollSleep = None
+ self.config = config
- # XXXstroucki new python has fallback values
- try:
- self.pollSleep = self.config.getint("AccountingService", "pollSleep")
- except:
- pass
+ self.pollSleep = None
- if self.pollSleep is None:
- self.pollSleep = 600
+ # XXXstroucki new python has fallback values
+ try:
+ self.pollSleep = self.config.getint("AccountingService", "pollSleep")
+ except:
+ pass
- self.cm = createClient(config)
- threading.Thread(target=self.__start).start()
+ if self.pollSleep is None:
+ self.pollSleep = 600
+
+ self.cm = createClient(config)
+ threading.Thread(target=self.__start).start()
# remote
- def record(self, strings):
- for string in strings:
- self.log.info("Remote: %s" % (string))
+ def record(self, strings):
+ for string in strings:
+ self.log.info("Remote: %s" % (string))
- def __start(self):
- while True:
- try:
- instances = self.cm.getInstances()
- for instance in instances:
- # XXXstroucki this currently duplicates what the CM was doing.
- self.log.info('Accounting: id %d host %d vmId %d user %d cores %d memory %d' % (instance.id, instance.hostId, instance.vmId, instance.userId, instance.cores, instance.memory))
- except:
- self.log.warning("Accounting iteration failed")
+ def __start(self):
+ while True:
+ try:
+ instances = self.cm.getInstances()
+ for instance in instances:
+ # XXXstroucki this currently duplicates what the CM was doing.
+ self.log.info('Accounting: id %s host %s vmId %s user %s cores %s memory %s' % (instance.id, instance.hostId, instance.vmId, instance.userId, instance.cores, instance.memory))
+ except:
+ self.log.warning("Accounting iteration failed")
-
- # wait to do the next iteration
- time.sleep(self.pollSleep)
+
+ # wait to do the next iteration
+ time.sleep(self.pollSleep)
diff --git a/src/tashi/agents/dhcpdns.py b/src/tashi/agents/dhcpdns.py
index a1741e1..9e95843 100644
--- a/src/tashi/agents/dhcpdns.py
+++ b/src/tashi/agents/dhcpdns.py
@@ -22,7 +22,7 @@
import subprocess
import time
from instancehook import InstanceHook
-from tashi.rpycservices.rpyctypes import Instance, NetworkConfiguration
+from tashi.rpycservices.rpyctypes import Instance
from tashi import boolean
class DhcpDns(InstanceHook):
@@ -55,15 +55,21 @@
self.ipMax = {}
self.currentIP = {}
self.usedIPs = {}
- for k in self.ipRange:
- ipRange = self.ipRange[k]
- (min, max) = ipRange.split("-")
- min = min.strip()
- max = max.strip()
- ipNum = self.strToIp(min)
- self.ipMin[k] = self.strToIp(min)
- self.ipMax[k] = self.strToIp(max)
- self.currentIP[k] = self.ipMin[k]
+
+ self.initIPs()
+
+ def initIPs(self):
+ self.usedIPs = {}
+ for network in self.ipRange:
+ ipRange = self.ipRange[network]
+ (ipMin, ipMax) = ipRange.split("-")
+ ipMin = ipMin.strip()
+ ipMax = ipMax.strip()
+ ipNum = self.strToIp(ipMin)
+ self.ipMin[network] = self.strToIp(ipMin)
+ self.ipMax[network] = self.strToIp(ipMax)
+ self.currentIP[network] = self.ipMin[network]
+
instances = self.client.getInstances()
for i in instances:
for nic in i.nics:
@@ -72,7 +78,7 @@
ipNum = self.strToIp(ip)
self.log.info('Added %s->%s during reinitialization' % (i.name, ip))
self.usedIPs[ipNum] = ip
- except Exception, e:
+ except Exception:
pass
def strToIp(self, s):
@@ -87,12 +93,17 @@
return "%d.%d.%d.%d" % ((ip>>24)&0xff, (ip>>16)&0xff, (ip>>8)&0xff, ip&0xff)
def allocateIP(self, nic):
+ # XXXstroucki: if the network is not defined having an ip
+ # range, this will throw a KeyError. Should be logged.
network = nic.network
allocatedIP = None
requestedIP = self.strToIp(nic.ip)
wrapToMinAlready = False
if (requestedIP <= self.ipMax[network] and requestedIP >= self.ipMin[network] and (requestedIP not in self.usedIPs)):
allocatedIP = requestedIP
+
+ # nic.ip will be updated later in preCreate if chosen
+ # ip not available
while (allocatedIP == None):
if (self.currentIP[network] > self.ipMax[network] and wrapToMinAlready):
raise UserWarning("No available IP addresses for network %d" % (network))
@@ -127,7 +138,7 @@
stdin.write("set hardware-type = 00:00:00:01\n") # Ethernet
stdin.write("create\n")
stdin.close()
- output = stdout.read()
+ __output = stdout.read()
stdout.close()
def removeDhcp(self, name, ipaddr=None):
@@ -146,7 +157,7 @@
stdin.write("open\n")
stdin.write("remove\n")
stdin.close()
- output = stdout.read()
+ __output = stdout.read()
stdout.close()
def addDns(self, name, ip):
@@ -169,15 +180,15 @@
stdin.write("update add %s %d IN PTR %s.%s.\n" % (reverseIpStr, self.dnsExpire, name, self.dnsDomain))
stdin.write("\n")
stdin.close()
- output = stdout.read()
+ __output = stdout.read()
stdout.close()
finally:
os.kill(child.pid, signal.SIGTERM)
- (pid, status) = os.waitpid(child.pid, os.WNOHANG)
+ (pid, __status) = os.waitpid(child.pid, os.WNOHANG)
while (pid == 0):
time.sleep(0.5)
os.kill(child.pid, signal.SIGTERM)
- (pid, status) = os.waitpid(child.pid, os.WNOHANG)
+ (pid, __status) = os.waitpid(child.pid, os.WNOHANG)
def removeDns(self, name):
cmd = "nsupdate"
@@ -196,15 +207,15 @@
stdin.write("update delete %s.%s A\n" % (name, self.dnsDomain))
stdin.write("\n")
stdin.close()
- output = stdout.read()
+ __output = stdout.read()
stdout.close()
finally:
os.kill(child.pid, signal.SIGTERM)
- (pid, status) = os.waitpid(child.pid, os.WNOHANG)
+ (pid, __status) = os.waitpid(child.pid, os.WNOHANG)
while (pid == 0):
time.sleep(0.5)
os.kill(child.pid, signal.SIGTERM)
- (pid, status) = os.waitpid(child.pid, os.WNOHANG)
+ (pid, __status) = os.waitpid(child.pid, os.WNOHANG)
def doUpdate(self, instance):
newInstance = Instance()
@@ -229,7 +240,7 @@
dhcpName = instance.name + "-nic%d" % (i)
self.log.info("Adding %s:{%s->%s} to DHCP" % (dhcpName, nic.mac, ip))
self.addDhcp(dhcpName, ip, nic.mac)
- except Exception, e:
+ except Exception:
self.log.exception("Failed to add host %s to DHCP/DNS" % (instance.name))
self.doUpdate(instance)
@@ -242,8 +253,11 @@
ip = nic.ip
try:
ipNum = self.strToIp(ip)
+ # XXXstroucki: if this fails with KeyError,
+ # we must have double-assigned the same IP
+ # address. How does this happen?
del self.usedIPs[ipNum]
- except Exception, e:
+ except Exception:
self.log.exception("Failed to remove host %s, ip %s from pool of usedIPs" % (instance.name, ip))
try:
if (i == 0):
@@ -251,9 +265,13 @@
else:
dhcpName = instance.name + "-nic%d" % (i)
self.removeDhcp(dhcpName)
- except Exception, e:
+ except Exception:
self.log.exception("Failed to remove host %s from DHCP" % (instance.name))
try:
+ # XXXstroucki: this can fail if the resolver can't
+ # resolve the dns server name (line 190). Perhaps
+ # the hostname should be then pushed onto a list
+ # to try again next time.
self.removeDns(instance.name)
- except Exception, e:
+ except Exception:
self.log.exception("Failed to remove host %s from DNS" % (instance.name))
diff --git a/src/tashi/agents/instancehook.py b/src/tashi/agents/instancehook.py
index 03b5684..cd62b40 100644
--- a/src/tashi/agents/instancehook.py
+++ b/src/tashi/agents/instancehook.py
@@ -1,5 +1,3 @@
-#! /usr/bin/env python
-
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -17,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+# superclass for instance hooks.
+
class InstanceHook(object):
def __init__(self, config, client, post=False):
if (self.__class__ is InstanceHook):
diff --git a/src/tashi/agents/locality-server.py b/src/tashi/agents/locality-server.py
deleted file mode 100755
index ac835ed..0000000
--- a/src/tashi/agents/locality-server.py
+++ /dev/null
@@ -1,228 +0,0 @@
-#!/usr/bin/python
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# this module provides a service to locate servers that are close
-# to a VM. Uses all-pairs shortest path algorithm. Need to provide
-# a topology for the underlying physical infrastructure.
-
-from socket import gethostname
-import os
-import threading
-import time
-import socket
-
-from tashi.services.ttypes import *
-
-from thrift import Thrift
-from thrift.transport import TSocket
-from thrift.transport import TTransport
-from thrift.protocol import TBinaryProtocol
-from thrift.server import TServer
-
-from tashi.services import clustermanagerservice
-from tashi.util import getConfig, createClient
-from tashi.parallel import *
-
-import tashi.services.layoutlocality.localityservice as localityservice
-
-from numpy import *
-from scipy import *
-
-cnames = {}
-def cannonicalName(hn):
- try:
- if cnames.has_key(hn):
- return cnames[hn]
- r = socket.gethostbyname_ex(hn)[0]
- cnames[hn] = r
- return r
- except:
- return hn
-
-# define matrix multiplication that can be used to calculate a min-plus
-# distance product
-def genMul(A, B, add, mult):
- '''generalized matrix multiplication'''
- C = zeros((shape(A)[0], shape(B)[1]))
- for i in range(shape(C)[0]):
- for j in range(shape(C)[1]):
- C[i,j] = add(mult(A[i,:], B[:,j]))
- return C
-
-def addHost(graph, hostVals, host):
- if not graph.has_key(host):
- graph[host] = []
- if not hostVals.has_key(host):
- hostVals[host] = len(hostVals)
-
-def graphConnect(graph, h1, h2):
- if not h1 in graph[h2]:
- graph[h2].append(h1)
- if not h2 in graph[h1]:
- graph[h1].append(h2)
-
-def graphFromFile(fn = 'serverLayout', graph = {}, hostVals = {}):
- f = open(fn)
- for line in f.readlines():
- line = line.split()
- if len(line) < 1:
- continue
- server = cannonicalName(line[0].strip())
-
- addHost(graph, hostVals, server)
- for peer in line[1:]:
- peer = cannonicalName(peer.strip())
- addHost(graph, hostVals, peer)
- graphConnect(graph, server, peer)
- return graph, hostVals
-
-def graphFromTashi(client, transport, graph={}, hostVals={}):
- print 'getting graph'
- if not transport.isOpen():
- transport.open()
- hosts = client.getHosts()
- instances = client.getInstances()
- for instance in instances:
- host = [cannonicalName(h.name) for h in hosts if h.id == instance.hostId]
- if len(host) <1 :
- print 'cant find vm host'
- continue
- host = host[0]
- print 'host is ', host
- addHost(graph, hostVals, host)
- print 'added host'
- vmhost = cannonicalName(instance.name)
- addHost(graph, hostVals, vmhost)
- print 'added vm'
- graphConnect(graph, host, vmhost)
- print 'connected'
- print 'returning from graphFromTashi'
- return graph, hostVals
-
-
-
-def graphToArray(graph, hostVals):
- a = zeros((len(hostVals), len(hostVals)))
- for host in graph.keys():
- if not hostVals.has_key(host):
- continue
- a[hostVals[host], hostVals[host]] = 1
- for peer in graph[host]:
- if not hostVals.has_key(peer):
- continue
- a[hostVals[host], hostVals[peer]] = 1
- a[a==0] = inf
- for i in range(shape(a)[0]):
- a[i,i]=0
- return a
-
-def shortestPaths(graphArray):
- a = graphArray
- for i in range(math.ceil(math.log(shape(a)[0],2))):
- a = genMul(a,a,min,plus)
- return a
-
-def plus(A, B):
- return A + B
-
-
-def getHopCountMatrix(sourceHosts, destHosts, array, hostVals):
- a = zeros((len(sourceHosts), len(destHosts)))
- a[a==0] = inf
- for i in range(len(sourceHosts)):
- sh = cannonicalName(sourceHosts[i])
- shv = None
- if hostVals.has_key(sh):
- shv = hostVals[sh]
- else:
- print 'host not found', sh
- continue
- for j in range(len(destHosts)):
- dh = cannonicalName(destHosts[j])
- dhv = None
- if hostVals.has_key(dh):
- dhv = hostVals[dh]
- else:
- print 'dest not found', dh
- continue
- print sh, dh, i,j, shv, dhv, array[shv, dhv]
- a[i,j] = array[shv, dhv]
- return a
-
-
-class LocalityService:
- def __init__(self):
- (config, configFiles) = getConfig(["Agent"])
- self.port = int(config.get('LocalityService', 'port'))
- print 'Locality service on port %i' % self.port
- self.processor = localityservice.Processor(self)
- self.transport = TSocket.TServerSocket(self.port)
- self.tfactory = TTransport.TBufferedTransportFactory()
- self.pfactory = TBinaryProtocol.TBinaryProtocolFactory()
- self.server = TServer.TThreadedServer(self.processor,
- self.transport,
- self.tfactory,
- self.pfactory)
-
- self.hostVals =[]
- self.array = array([[]])
- self.rtime = 0
-
-
- self.fileName = os.path.expanduser(config.get("LocalityService", "staticLayout"))
- (self.client, self.transport) = createClient(config)
-
- self.server.serve()
-
- @synchronizedmethod
- def refresh(self):
- if time.time() - self.rtime < 10:
- return
- g, self.hostVals = graphFromFile(self.fileName)
- try:
- g, self.hostVals = graphFromTashi(self.client, self.transport, g, self.hostVals)
- except e:
- print e
- print 'could not get instance list from cluster manager'
- print 'graph to array'
- a = graphToArray(g, self.hostVals)
- print 'calling shortest paths ', a.shape
- self.array = shortestPaths(a)
- print 'computed shortest paths'
- print self.array
- print self.hostVals
- @synchronizedmethod
- def getHopCountMatrix(self, sourceHosts, destHosts):
- self.refresh()
- print 'getting hop count matrix for', sourceHosts, destHosts
- hcm = getHopCountMatrix(sourceHosts, destHosts, self.array, self.hostVals)
- print hcm
- return hcm
-
-
-def main():
-
- #XXXstroucki This code has not been updated for several years.
- # It may still be useful as an example.
- import sys
- sys.exit(0);
-
- ls = LocalityService()
-
-if __name__ == "__main__":
- main()
diff --git a/src/tashi/agents/mauipacket.py b/src/tashi/agents/mauipacket.py
deleted file mode 100644
index 5423db5..0000000
--- a/src/tashi/agents/mauipacket.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import subprocess
-import time
-import pseudoDes
-
-class MauiPacket:
- def __init__(self, key=0):
- self.size = 0
- self.char = '\n'
- self.chksum = '0'*16
- self.timestamp = int(time.time())
- self.auth = ''
- self.data = []
- self.msg = ''
- self.key=key
- def readPacket(self, istream):
- self.msg = ''
-
- size = istream.read(8)
- self.msg = self.msg+size
- self.size = int(size)
-
- self.char = istream.read(1)
- self.msg = self.msg + self.char
-
- packet = istream.read(self.size)
- self.msg = self.msg + packet
-
- packet = packet.split()
-
- for i in range(len(packet)):
- item = packet[i].split('=')
- if item[0] == 'CK':
- self.chksum = item[1]
- if item[0] == 'TS':
- self.timestamp = int(item[1])
- if item[0] == 'AUTH':
- self.auth = item[1]
- if item[0] == 'DT':
- self.data = packet[i:]
- self.data=self.data[0].split('=',1)[1:] + self.data[1:]
-
- def checksumMessage(self, message, key=None):
- if key == None:
- key = self.key
- if type(key) == type(''):
- key = int(key)
- chksum = pseudoDes.generateKey(message, key)
- chksum = '%016x' % chksum
- return chksum
- def getChecksum(self):
- cs = self.msg.partition('TS=')
- cs = cs[1]+cs[2]
- chksum = self.checksumMessage(cs)
- return chksum
- def verifyChecksum(self):
- chksum = self.getChecksum()
- if chksum != self.chksum:
- print 'verifyChecksum: "%s"\t"%s"'%(chksum, self.chksum)
- print 'verifyChecksum (types): %s\t%s' %(type(chksum), type(self.chksum))
- return False
- return True
- def set(self, data, auth=None, key=None, timestamp=None):
- if timestamp==None:
- timestamp = int(time.time())
- self.data = data
- if auth !=None:
- self.auth = auth
- if key != None:
- self.key = key
- self.timstamp=timestamp
- self.fixup()
- def fixup(self):
- datastring = "TS=%i AUTH=%s DT=%s"%(self.timestamp, self.auth, (' '.join(self.data)))
- self.chksum = self.checksumMessage(datastring)
-
- pktstring = 'CK=%s %s'%(self.chksum, datastring)
- self.size = len(pktstring)
- def __str__(self):
- datastring = "TS=%i AUTH=%s DT=%s"%(self.timestamp, self.auth, (' '.join(self.data)))
- self.chksum = self.checksumMessage(datastring)
-
- pktstring = 'CK=%s %s'%(self.chksum, datastring)
- self.msg = ''
- self.msg = self.msg + '%08i'%len(pktstring)
- self.msg = self.msg + self.char
- self.msg = self.msg + pktstring
-
- return self.msg
- def prettyString(self):
- s = '''Maui Packet
------------
-size:\t\t%i
-checksum:\t%s
-timestamp:\t%s
-auth:\t\t%s
-data:
-%s
------------'''
- s = s%(self.size, self.chksum, self.timestamp, self.auth, self.data)
- return s
diff --git a/src/tashi/agents/mauiwiki.py b/src/tashi/agents/mauiwiki.py
index 1af9b62..fdb3574 100755
--- a/src/tashi/agents/mauiwiki.py
+++ b/src/tashi/agents/mauiwiki.py
@@ -17,20 +17,122 @@
# specific language governing permissions and limitations
# under the License.
+# XXXstroucki: wiki is a text based resource manager that maui can
+# use. It also seems to have disappeared from the face of the web.
+# This code is unmaintained.
+
+# XXXstroucki former file mauipacket.py
+#import subprocess
import time
-import hashlib
-import sys
-import subprocess
-import socket, SocketServer
-from socket import gethostname
-import os
+import SocketServer
+from tashi.utils import pseudoDes
+from tashi.rpycservices.rpyctypes import HostState, InstanceState
+
+class MauiPacket:
+ def __init__(self, key=0):
+ self.size = 0
+ self.char = '\n'
+ self.chksum = '0'*16
+ self.timestamp = int(time.time())
+ self.auth = ''
+ self.data = []
+ self.msg = ''
+ self.key=key
+ def readPacket(self, istream):
+ self.msg = ''
+
+ size = istream.read(8)
+ self.msg = self.msg+size
+ self.size = int(size)
+
+ self.char = istream.read(1)
+ self.msg = self.msg + self.char
+
+ packet = istream.read(self.size)
+ self.msg = self.msg + packet
+
+ packet = packet.split()
+
+ for i in range(len(packet)):
+ item = packet[i].split('=')
+ if item[0] == 'CK':
+ self.chksum = item[1]
+ if item[0] == 'TS':
+ self.timestamp = int(item[1])
+ if item[0] == 'AUTH':
+ self.auth = item[1]
+ if item[0] == 'DT':
+ self.data = packet[i:]
+ self.data=self.data[0].split('=',1)[1:] + self.data[1:]
+
+ def checksumMessage(self, message, key=None):
+ if key == None:
+ key = self.key
+ if type(key) == type(''):
+ key = int(key)
+ chksum = pseudoDes.generateKey(message, key)
+ chksum = '%016x' % chksum
+ return chksum
+ def getChecksum(self):
+ cs = self.msg.partition('TS=')
+ cs = cs[1]+cs[2]
+ chksum = self.checksumMessage(cs)
+ return chksum
+ def verifyChecksum(self):
+ chksum = self.getChecksum()
+ if chksum != self.chksum:
+ print 'verifyChecksum: "%s"\t"%s"'%(chksum, self.chksum)
+ print 'verifyChecksum (types): %s\t%s' %(type(chksum), type(self.chksum))
+ return False
+ return True
+ def set(self, data, auth=None, key=None, timestamp=None):
+ if timestamp==None:
+ timestamp = int(time.time())
+ self.data = data
+ if auth !=None:
+ self.auth = auth
+ if key != None:
+ self.key = key
+ self.timestamp=timestamp
+ self.fixup()
+ def fixup(self):
+ datastring = "TS=%i AUTH=%s DT=%s"%(self.timestamp, self.auth, (' '.join(self.data)))
+ self.chksum = self.checksumMessage(datastring)
+
+ pktstring = 'CK=%s %s'%(self.chksum, datastring)
+ self.size = len(pktstring)
+ def __str__(self):
+ datastring = "TS=%i AUTH=%s DT=%s"%(self.timestamp, self.auth, (' '.join(self.data)))
+ self.chksum = self.checksumMessage(datastring)
+
+ pktstring = 'CK=%s %s'%(self.chksum, datastring)
+ self.msg = ''
+ self.msg = self.msg + '%08i'%len(pktstring)
+ self.msg = self.msg + self.char
+ self.msg = self.msg + pktstring
+
+ return self.msg
+ def prettyString(self):
+ s = '''Maui Packet
+-----------
+size:\t\t%i
+checksum:\t%s
+timestamp:\t%s
+auth:\t\t%s
+data:
+%s
+-----------'''
+ s = s%(self.size, self.chksum, self.timestamp, self.auth, self.data)
+ return s
+
+# XXXstroucki original file mauiwiki.py
import threading
import logging.config
from tashi.parallel import synchronizedmethod
from tashi.services.ttypes import *
-from tashi.util import getConfig, createClient, instantiateImplementation, boolean
-from tashi.agents.mauipacket import MauiPacket
+from tashi.util import getConfig, createClient, instantiateImplementation
+#from tashi.agents.mauipacket import MauiPacket
import tashi.util
def jobnameToId(jobname):
@@ -57,24 +159,24 @@
def postDestroy(self, inst):
for hook in self.hooks:
hook.postDestroy(inst)
- def idToInst(self, id):
+ def idToInst(self, _id):
instances = self.client.getInstances()
print 'instances ', instances
- insts = [i for i in instances if str(i.id)==str(id)]
+ insts = [i for i in instances if str(i.id)==str(_id)]
if len(insts) == 0:
- raise "No instance with ID %s"%id
+ raise "No instance with ID %s"%_id
if len(insts) > 1:
- raise "Multiple instances with ID %s"%id
+ raise "Multiple instances with ID %s"%_id
inst = insts[0]
return inst
- def destroyById(self, id):
- inst = self.idToInst(id)
- self.client.destroyVm(int(id))
+ def destroyById(self, _id):
+ inst = self.idToInst(_id)
+ self.client.destroyVm(int(_id))
self.postDestroy(inst)
- def activateById(self, id, host):
- inst = self.idToInst(id)
+ def activateById(self, _id, host):
+ inst = self.idToInst(_id)
self.preCreate(inst)
- self.client.activateVm(int(id), host)
+ self.client.activateVm(int(_id), host)
def cmplists(a, b):
for i in range(len(a)):
@@ -301,8 +403,8 @@
if j.updateTime >= updatetime and j.id in joblist]
jl = {}
for job in jobs:
- id = "%s.%i"%(job.name, job.id)
- jl[id] = {'STATE':self.wikiInstanceState(job),
+ _id = "%s.%i"%(job.name, job.id)
+ jl[_id] = {'STATE':self.wikiInstanceState(job),
'UNAME':self.users[job.userId].name,
'GNAME':self.users[job.userId].name,
'UPDATETIME':int(job.updateTime),
@@ -313,14 +415,14 @@
'RMEM':str(job.memory),
'WCLIMIT':str(self.defaultJobTime)}
if job.hostId != None:
- jl[id]['TASKLIST'] = self.hosts[job.hostId].name
+ jl[_id]['TASKLIST'] = self.hosts[job.hostId].name
return jl
@synchronizedmethod
- def activateById(self, id, host):
- if not self.instances.has_key(id):
+ def activateById(self, _id, host):
+ if not self.instances.has_key(_id):
raise "no such instance"
- self.ihooks.activateById(id, host)
- self.instances[id].state=InstanceState.Activating
+ self.ihooks.activateById(_id, host)
+ self.instances[_id].state=InstanceState.Activating
class MauiListener(SocketServer.StreamRequestHandler):
def setup(self):
diff --git a/src/tashi/agents/primitive.py b/src/tashi/agents/primitive.py
index 99ef702..5014d47 100755
--- a/src/tashi/agents/primitive.py
+++ b/src/tashi/agents/primitive.py
@@ -19,16 +19,18 @@
import time
import logging.config
+import sys
from tashi.rpycservices.rpyctypes import Errors, HostState, InstanceState, TashiException
-from tashi.util import getConfig, createClient, instantiateImplementation, boolean
+from tashi.util import createClient, instantiateImplementation, boolean
+from tashi.utils.config import Config
import tashi
class Primitive(object):
- def __init__(self, config, cmclient):
+ def __init__(self, config):
self.config = config
- self.cm = cmclient
+ self.cm = createClient(config)
self.hooks = []
self.log = logging.getLogger(__file__)
self.scheduleDelay = float(self.config.get("Primitive", "scheduleDelay"))
@@ -40,10 +42,10 @@
name = name.lower()
if (name.startswith("hook")):
try:
- self.hooks.append(instantiateImplementation(value, config, cmclient, False))
+ self.hooks.append(instantiateImplementation(value, config, self.cm, False))
except:
self.log.exception("Failed to load hook %s" % (value))
- self.hosts = {}
+ self.hosts = {}
self.load = {}
self.instances = {}
self.muffle = {}
@@ -62,9 +64,9 @@
for h in self.cm.getHosts():
#XXXstroucki get all hosts here?
#if (h.up == True and h.state == HostState.Normal):
- hosts[ctr] = h
- ctr = ctr + 1
- load[h.id] = []
+ hosts[ctr] = h
+ ctr = ctr + 1
+ load[h.id] = []
load[None] = []
_instances = self.cm.getInstances()
@@ -75,8 +77,9 @@
# XXXstroucki put held machines behind pending ones
heldInstances = []
for i in instances.itervalues():
+ # Nonrunning VMs will have hostId of None, but
+ # so will Suspended VMs.
if (i.hostId or i.state == InstanceState.Pending):
- # Nonrunning VMs will have hostId of None
load[i.hostId] = load[i.hostId] + [i.id]
elif (i.hostId is None and i.state == InstanceState.Held):
heldInstances = heldInstances + [i.id]
@@ -199,14 +202,17 @@
if myDisk == i.disks[0].uri and i.disks[0].persistent == True:
count += 1
if count > 1:
- minMaxHost = None
+ minMaxHost = None
if (minMaxHost):
# found a host
if (not inst.hints.get("__resume_source", None)):
# only run preCreate hooks if newly starting
for hook in self.hooks:
- hook.preCreate(inst)
+ try:
+ hook.preCreate(inst)
+ except:
+ self.log.warning("Failed to run preCreate hook")
self.log.info("Scheduling instance %s (%d mem, %d cores, %d uid) on host %s" % (inst.name, inst.memory, inst.cores, inst.userId, minMaxHost.name))
rv = "fail"
try:
@@ -241,8 +247,21 @@
def start(self):
oldInstances = {}
+ # XXXstroucki: scheduling races have been observed, where
+ # a vm is scheduled on a host that had not updated its
+ # capacity with the clustermanager, leading to overloaded
+ # hosts. I think the place to insure against this happening
+ # is in the nodemanager. This scheduler will keep an
+ # internal state of cluster loading, but that is best
+ # effort and will be refreshed from CM once the buffer
+ # of vms to be scheduled is exhausted.
+
while True:
try:
+ # XXXstroucki: to get a list of vms to be
+ # scheduled, it asks the CM for a full
+ # cluster state, and will look at those
+ # without a host.
self.__getState()
# Check for VMs that have exited and call
@@ -250,7 +269,7 @@
for i in oldInstances:
# XXXstroucki what about paused and saved VMs?
# XXXstroucki: do we need to look at Held VMs here?
- if (i not in self.instances and (oldInstances[i].state == InstanceState.Running or oldInstances[i].state == InstanceState.Destroying)):
+ if (i not in self.instances and (oldInstances[i].state == InstanceState.Running or oldInstances[i].state == InstanceState.Destroying or oldInstances[i].state == InstanceState.ShuttingDown)):
self.log.info("VM exited: %s" % (oldInstances[i].name))
for hook in self.hooks:
hook.postDestroy(oldInstances[i])
@@ -280,13 +299,22 @@
time.sleep(self.scheduleDelay)
def main():
- (config, configFiles) = getConfig(["Agent"])
+ config = Config(["Agent"])
+ configFiles = config.getFiles()
+
publisher = instantiateImplementation(config.get("Agent", "publisher"), config)
tashi.publisher = publisher
- cmclient = createClient(config)
logging.config.fileConfig(configFiles)
- agent = Primitive(config, cmclient)
- agent.start()
+ agent = Primitive(config)
+
+ try:
+ agent.start()
+ except KeyboardInterrupt:
+ pass
+
+ log = logging.getLogger(__file__)
+ log.info("Primitive exiting")
+ sys.exit(0)
if __name__ == "__main__":
main()
diff --git a/src/tashi/agents/primitive_zoni.py b/src/tashi/agents/primitive_zoni.py
index c770e75..34fd0d7 100755
--- a/src/tashi/agents/primitive_zoni.py
+++ b/src/tashi/agents/primitive_zoni.py
@@ -17,6 +17,11 @@
# specific language governing permissions and limitations
# under the License.
+# XXXstroucki: this apparently originated from a copy of the primitive
+# scheduler code sometime in 2010. It aims to keep a pool of tashi servers
+# available, and other servers shut down. Could this be better suited for
+# a hook function of the scheduler?
+
from socket import gethostname
import os
import socket
diff --git a/src/tashi/client/client.py b/src/tashi/client/client.py
deleted file mode 100755
index 71b5b20..0000000
--- a/src/tashi/client/client.py
+++ /dev/null
@@ -1,213 +0,0 @@
-#! /usr/bin/env python
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import inspect
-import os
-import sys
-import types
-from tashi.services.ttypes import *
-from thrift.protocol.TBinaryProtocol import TBinaryProtocol
-from thrift.transport.TTransport import TBufferedTransport
-from thrift.transport.TSocket import TSocket
-
-from tashi.services import clustermanagerservice
-from tashi import vmStates
-
-from tashi.util import getConfig
-
-def makeHTMLTable(list):
- (stdin_r, stdin_w) = os.pipe()
-
-# XXXpipe: find number of columns in current window
- pipe = os.popen("tput cols")
- columns = pipe.read().strip()
- keys = {}
- for k in list:
- for k2 in k.__dict__.keys():
- if (not k2.endswith("Obj")):
- keys[k2] = k2
- if ('id' in keys):
- del keys['id']
- keylist = ['id'] + keys.keys()
- else:
- keylist = keys.keys()
- output = "<html>"
- output = output + "<table>"
- output = output + "<tr>"
- for k in keylist:
- output = output + "<td>%s</td>" % (k)
- output = output + "</tr>"
- for k in list:
- output = output + "<tr>"
- for k2 in keylist:
- if (k2 == "state"):
- output = output + "<td>%s</td>" % (str(vmStates[k.__dict__.get(k2, None)]))
- else:
- output = output + "<td>%s</td>" % (str(k.__dict__.get(k2, None)))
- output = output + "</tr>"
- output = output + "</table>"
- output = output + "</html>"
- pid = os.fork()
- if (pid == 0):
- os.close(stdin_w)
- os.dup2(stdin_r, 0)
- os.close(stdin_r)
- os.execl("/usr/bin/lynx", "/usr/bin/lynx", "-width=%s" % (columns), "-dump", "-stdin")
- sys.exit(-1)
- os.close(stdin_r)
- os.write(stdin_w, output)
- os.close(stdin_w)
- os.waitpid(pid, 0)
-
-def getFunction(argv):
- """Tries to determine the name of the function requested by the user -- may be called multiple times if the binary name is 'client'"""
- function = "None"
- if (len(argv) > 0):
- function = argv[0].strip()
- if (function.rfind("/") != -1):
- function = function[function.rfind("/")+1:]
- if (function.rfind(".") != -1):
- function = function[:function.rfind(".")]
- return function
-
-def getFunctionInfo(m):
- """Gets a string that describes a function from the interface"""
- f = getattr(clustermanagerservice.Iface, m)
- argspec = inspect.getargspec(f)[0][1:]
- return m + inspect.formatargspec(argspec)
-
-def usage():
- """Print program usage"""
- print "Available methods:"
- for m in methods:
- print "\t" + getFunctionInfo(m)
- print
- print "Examples:"
- print "\tgetInstances"
- print "\taddUser 'User(d={\"username\":\"foobar\"})'"
- print "\tremoveUser 2"
- print "\tcreateVM 1 1"
-
-def simpleType(obj):
- """Determines whether an object is a simple type -- used as a helper function to pprint"""
- if (type(obj) is not type([])):
- if (not getattr(obj, "__dict__", None)):
- return True
- return False
-
-def pprint(obj, depth = 0, key = None):
- """My own version of pprint that prints out a dict in a readable, but slightly more compact format"""
- valueManip = lambda x: x
- if (key):
- keyString = key + ": "
- if (key == "state"):
- valueManip = lambda x: vmStates[x]
- else:
- keyString = ""
- if (type(obj) is type([])):
- if (reduce(lambda x, y: x and simpleType(y), obj, True)):
- print (" " * (depth * INDENT)) + keyString + str(obj)
- else:
- print (" " * (depth * INDENT)) + keyString + "["
- for o in obj:
- pprint(o, depth + 1)
- print (" " * (depth * INDENT)) + "]"
- elif (getattr(obj, "__dict__", None)):
- if (reduce(lambda x, y: x and simpleType(y), obj.__dict__.itervalues(), True)):
- print (" " * (depth * INDENT)) + keyString + str(obj)
- else:
- print (" " * (depth * INDENT)) + keyString + "{"
- for (k, v) in obj.__dict__.iteritems():
- pprint(v, depth + 1, k)
- print (" " * (depth * INDENT)) + "}"
- else:
- print (" " * (depth * INDENT)) + keyString + str(valueManip(obj))
-
-def main():
- """Main function for the client program"""
- global INDENT, methods, exitCode
- exitCode = 0
- INDENT = (os.getenv("INDENT", 4))
- methods = filter(lambda x: not x.startswith("__"), clustermanagerservice.Iface.__dict__.keys())
- function = getFunction(sys.argv)
- if (function == "client"):
- function = getFunction(sys.argv[1:])
- if (function == "--makesyms"):
- for m in methods:
- os.symlink(sys.argv[0], m)
- sys.exit(0)
- if (function == "--rmsyms"):
- for m in methods:
- os.unlink(m)
- sys.exit(0)
-
- (config,configFiles) = getConfig(["Client"])
- cfgHost = config.get('Client', 'clusterManagerHost')
- cfgPort = config.get('Client', 'clusterManagerPort')
- cfgTimeout = float(config.get('Client', 'clusterManagerTimeout'))
- host = os.getenv('TASHI_CM_HOST', cfgHost)
- port = os.getenv('TASHI_CM_PORT', cfgPort)
- timeout = float(os.getenv('TASHI_CM_TIMEOUT', cfgTimeout)) * 1000.0
-
- socket = TSocket(host, int(port))
- socket.setTimeout(timeout)
- transport = TBufferedTransport(socket)
- protocol = TBinaryProtocol(transport)
- client = clustermanagerservice.Client(protocol)
- client._transport = transport
- client._transport.open()
- f = getattr(client, function, None)
- if not f:
- usage()
- sys.exit(-1)
- args = map(lambda x: eval(x), sys.argv[1:])
- try:
- res = f(*args)
- def cmp(x, y):
- try:
- if (x.id < y.id):
- return -1
- elif (y.id < x.id):
- return 1
- else:
- return 0
- except Exception, e:
- return 0
- if (type(res) == types.ListType):
- res.sort(cmp)
- if (os.getenv("USE_HTML_TABLES")):
- try:
- makeHTMLTable(res)
- except:
- pprint(res)
- else:
- pprint(res)
- except TashiException, e:
- print e.msg
- exitCode = e.errno
- except TypeError, e:
- print e
- print "\t" + getFunctionInfo(function)
- exitCode = -1
- finally:
- client._transport.close()
- sys.exit(exitCode)
-
-if __name__ == "__main__":
- main()
diff --git a/src/tashi/client/tashi-client.py b/src/tashi/client/tashi-client.py
index db24816..8afdb7d 100755
--- a/src/tashi/client/tashi-client.py
+++ b/src/tashi/client/tashi-client.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#!/usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
@@ -21,8 +21,10 @@
import random
import sys
import types
-from tashi.rpycservices.rpyctypes import *
-from tashi import vmStates, hostStates, boolean, getConfig, stringPartition, createClient
+from tashi.rpycservices.rpyctypes import NetworkConfiguration,\
+ DiskConfiguration, HostState, Instance, Host, TashiException
+from tashi.utils.config import Config
+from tashi import vmStates, hostStates, boolean, stringPartition, createClient
users = {}
networks = {}
@@ -50,6 +52,22 @@
return users[user].id
raise ValueError("Unknown user %s" % (userStr))
+def checkHid(host):
+ userId = getUser()
+ hosts = client.getHosts()
+ hostId = None
+ try:
+ hostId = int(host)
+ except:
+ for h in hosts:
+ if (h.name == host):
+ hostId = h.id
+ if (hostId is None):
+ raise ValueError("Unknown host %s" % (str(host)))
+
+ # XXXstroucki permissions for host related stuff?
+ return hostId
+
def checkIid(instance):
userId = getUser()
instances = client.getInstances()
@@ -78,10 +96,17 @@
def getDefaultNetwork():
fetchNetworks()
- networkId = 1
+ networkId = 0
for network in networks:
+ if (getattr(networks[network], "default", False) is True):
+ networkId = network
+ break
+
+ # Naming the network "default" is deprecated, and
+ # this functionality will be removed soon
if (networks[network].name == "default"):
networkId = network
+ break
return networkId
def randomNetwork():
@@ -93,7 +118,7 @@
disks = []
for strDisk in strDisks:
strDisk = strDisk.strip()
- (l, s, r) = stringPartition(strDisk, ":")
+ (l, __s, r) = stringPartition(strDisk, ":")
if (r == ""):
r = "False"
r = boolean(r)
@@ -109,12 +134,12 @@
nics = []
for strNic in strNics:
strNic = strNic.strip()
- (l, s, r) = stringPartition(strNic, ":")
+ (l, __s, r) = stringPartition(strNic, ":")
n = l
if (n == ''):
n = getDefaultNetwork()
n = int(n)
- (l, s, r) = stringPartition(r, ":")
+ (l, __s, r) = stringPartition(r, ":")
ip = l
if (ip == ''):
ip = None
@@ -133,7 +158,7 @@
hints = {}
for strHint in strHints:
strHint = strHint.strip()
- (l, s, r) = stringPartition(strHint, "=")
+ (l, __s, r) = stringPartition(strHint, "=")
hints[l] = r
return hints
except:
@@ -186,12 +211,26 @@
instances.append(client.createVm(instance))
return instances
+def shutdownMany(basename):
+ return __shutdownOrDestroyMany("shutdown", basename)
+
def destroyMany(basename):
+ return __shutdownOrDestroyMany("destroy", basename)
+
+def __shutdownOrDestroyMany(method, basename):
instances = client.getInstances()
count = 0
for i in instances:
if (i.name.startswith(basename + "-") and i.name[len(basename)+1].isdigit()):
- client.destroyVm(i.id)
+ if method == "shutdown":
+ client.shutdownVm(i.id)
+
+ elif method == "destroy":
+ client.destroyVm(i.id)
+
+ else:
+ raise ValueError("Unknown method")
+
count = count + 1
if (count == 0):
raise ValueError("That is an unused basename")
@@ -213,6 +252,7 @@
'copyImage': (None, None),
'createVm': (None, ['id', 'hostId', 'name', 'user', 'state', 'disk', 'memory', 'cores']),
'createMany': (createMany, ['id', 'hostId', 'name', 'user', 'state', 'disk', 'memory', 'cores']),
+'shutdownMany': (shutdownMany, None),
'destroyMany': (destroyMany, None),
'getVmLayout': (getVmLayout, ['id', 'name', 'state', 'instances', 'usedMemory', 'memory', 'usedCores', 'cores']),
'getInstances': (None, ['id', 'hostId', 'name', 'user', 'state', 'disk', 'memory', 'cores']),
@@ -225,6 +265,7 @@
'createMany': [('userId', int, getUser, False), ('basename', str, lambda: requiredArg('basename'), True), ('cores', int, lambda: 1, False), ('memory', int, lambda: 128, False), ('disks', parseDisks, lambda: requiredArg('disks'), True), ('nics', parseNics, randomNetwork, False), ('hints', parseHints, lambda: {}, False), ('count', int, lambda: requiredArg('count'), True)],
'shutdownVm': [('instance', checkIid, lambda: requiredArg('instance'), True)],
'destroyVm': [('instance', checkIid, lambda: requiredArg('instance'), True)],
+'shutdownMany': [('basename', str, lambda: requiredArg('basename'), True)],
'destroyMany': [('basename', str, lambda: requiredArg('basename'), True)],
'suspendVm': [('instance', checkIid, lambda: requiredArg('instance'), True)],
'resumeVm': [('instance', checkIid, lambda: requiredArg('instance'), True)],
@@ -235,6 +276,7 @@
'getImages': [],
'copyImage': [('src', str, lambda: requiredArg('src'),True), ('dst', str, lambda: requiredArg('dst'), True)],
'getHosts': [],
+'setHostState': [('host', checkHid, lambda: requiredArg('host'), True), ('state', str, lambda: requiredArg('state'), True)],
'getUsers': [],
'getNetworks': [],
'getInstances': [],
@@ -250,6 +292,7 @@
'createMany': '[Instance(d={"userId":userId,"name":basename,"cores":cores,"memory":memory,"disks":disks,"nics":nics,"hints":hints}), count]',
'shutdownVm': '[instance]',
'destroyVm': '[instance]',
+'shutdownMany': '[basename]',
'destroyMany': '[basename]',
'suspendVm': '[instance]',
'resumeVm': '[instance]',
@@ -260,6 +303,7 @@
'unregisterHost' : '[hostId]',
'getSlots' : '[cores, memory]',
'copyImage' : '[src, dst]',
+'setHostState' : '[host, state]',
}
# Descriptions
@@ -268,6 +312,7 @@
'createMany': 'Utility function that creates many VMs with the same set of parameters',
'shutdownVm': 'Attempts to shutdown a VM nicely',
'destroyVm': 'Immediately destroys a VM -- it is the same as unplugging a physical machine and should be used for non-persistent VMs or when all else fails',
+'shutdownMany': 'Attempts to gracefully shut down a group of VMs created with createMany',
'destroyMany': 'Destroys a group of VMs created with createMany',
'suspendVm': 'Suspends a running VM to disk',
'resumeVm': 'Resumes a suspended VM from disk',
@@ -276,6 +321,7 @@
'unpauseVm': 'Unpauses a paused VM',
'getSlots': 'Get a count of how many VMs could be started in the cluster',
'getHosts': 'Gets a list of hosts running Node Managers',
+'setHostState': 'Set the state of a host, eg. Normal or Drained',
'getUsers': 'Gets a list of users',
'getNetworks': 'Gets a list of available networks for VMs to be placed on',
'getInstances': 'Gets a list of all VMs in the cluster',
@@ -293,6 +339,7 @@
'createMany': ['--basename foobar --disks i386-hardy.qcow2 --count 4'],
'shutdownVm': ['--instance 12345', '--instance foobar'],
'destroyVm': ['--instance 12345', '--instance foobar'],
+'shutdownMany': ['--basename foobar'],
'destroyMany': ['--basename foobar'],
'suspendVm': ['--instance 12345', '--instance foobar'],
'resumeVm': ['--instance 12345', '--instance foobar'],
@@ -301,6 +348,7 @@
'unpauseVm': ['--instance 12345', '--instance foobar'],
'getSlots': ['--cores 1 --memory 128'],
'getHosts': [''],
+'setHostState': ['--host fnord --state Drained'],
'getUsers': [''],
'getNetworks': [''],
'getInstances': [''],
@@ -321,7 +369,8 @@
print "Unknown function %s" % (func)
print
functions = argLists
- print "%s is the client program for Tashi, a system for cloud-computing on BigData." % (os.path.basename(sys.argv[0]))
+ print "%s is the client program for Tashi" % (os.path.basename(sys.argv[0]))
+ print "Tashi, a system for cloud-computing on BigData"
print "Visit http://incubator.apache.org/tashi/ for more information."
print
else:
@@ -373,9 +422,9 @@
except:
obj.state = 'Unknown'
-def genKeys(list):
+def genKeys(_list):
keys = {}
- for row in list:
+ for row in _list:
for item in row.__dict__.keys():
keys[item] = item
if ('id' in keys):
@@ -385,25 +434,25 @@
keys = keys.values()
return keys
-def makeTable(list, keys=None):
- (consoleWidth, consoleHeight) = (9999, 9999)
+def makeTable(_list, keys=None):
+ (consoleWidth, __consoleHeight) = (9999, 9999)
try:
# XXXpipe: get number of rows and column on current window
stdout = os.popen("stty size")
- r = stdout.read()
+ __r = stdout.read()
stdout.close()
except:
pass
- for obj in list:
+ for obj in _list:
transformState(obj)
if (keys == None):
- keys = genKeys(list)
+ keys = genKeys(_list)
for (show, k) in show_hide:
if (show):
if (k != "all"):
keys.append(k)
else:
- keys = genKeys(list)
+ keys = genKeys(_list)
else:
if (k in keys):
keys.remove(k)
@@ -412,7 +461,7 @@
maxWidth = {}
for k in keys:
maxWidth[k] = len(k)
- for row in list:
+ for row in _list:
for k in keys:
if (k in row.__dict__):
maxWidth[k] = max(maxWidth[k], len(str(row.__dict__[k])))
@@ -445,8 +494,8 @@
return 1
else:
return 0
- list.sort(cmp=sortFunction)
- for row in list:
+ _list.sort(cmp=sortFunction)
+ for row in _list:
line = ""
for k in keys:
row.__dict__[k] = row.__dict__.get(k, "")
@@ -507,11 +556,12 @@
"""Main function for the client program"""
global INDENT, exitCode, client
exitCode = 0
+ exception = None
INDENT = (os.getenv("INDENT", 4))
if (len(sys.argv) < 2):
usage()
function = matchFunction(sys.argv[1])
- (config, configFiles) = getConfig(["Client"])
+ config = Config(["Client"])
# build a structure of possible arguments
possibleArgs = {}
@@ -551,30 +601,54 @@
if (arg.startswith("--")):
if (arg[2:] in possibleArgs):
(parg, conv, default, required) = possibleArgs[arg[2:]]
- val = conv(args.pop(0))
+ try:
+ val = None
+ lookahead = args[0]
+ if not lookahead.startswith("--"):
+ val = args.pop(0)
+ except:
+ pass
+
+ val = conv(val)
if (val == None):
val = default()
vals[parg] = val
continue
+ # somewhat lame, but i don't want to rewrite the fn at this time
+ exception = ValueError("Unknown argument %s" % (arg))
- raise ValueError("Unknown argument %s" % (arg))
-
-
- f = getattr(client, function, None)
+ f = None
+ try:
+ f = extraViews[function][0]
+ except:
+ pass
if (f is None):
- f = extraViews[function][0]
- if (function in convertArgs):
- fargs = eval(convertArgs[function], globals(), vals)
- else:
- fargs = []
- res = f(*fargs)
+ f = getattr(client, function, None)
+
+ try:
+ if exception is not None:
+ raise exception
+
+ if (function in convertArgs):
+ fargs = eval(convertArgs[function], globals(), vals)
+ else:
+ fargs = []
+
+ res = f(*fargs)
+ except Exception, e:
+ print "Failed in calling %s: %s" % (function, e)
+ print "Please run tashi-client --examples for syntax information"
+ sys.exit(-1)
+
if (res != None):
keys = extraViews.get(function, (None, None))[1]
try:
if (type(res) == types.ListType):
makeTable(res, keys)
+ elif (type(res) == types.StringType):
+ print res
else:
makeTable([res], keys)
diff --git a/src/tashi/client/test.py b/src/tashi/client/test.py
deleted file mode 100644
index a53eefa..0000000
--- a/src/tashi/client/test.py
+++ /dev/null
@@ -1,314 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import unittest
-import logging
-import sys
-import signal
-import os.path
-import copy
-import time
-import random
-from ConfigParser import ConfigParser
-
-from tashi.services.ttypes import *
-from thrift.transport.TSocket import TSocket
-from thrift.protocol.TBinaryProtocol import TBinaryProtocol
-from thrift.transport.TTransport import TBufferedTransport
-
-from tashi.services import clustermanagerservice
-from tashi.messaging.threadpool import synchronized
-from tashi.messaging.tashimessaging import TestTashiSubscriber
-
-from tashi.util import getConfig
-
-import tashi.client.client
-
-class ClientConnection(object):
- '''Creates an rpc proxy'''
- def __init__(self, host, port):
- self.host = host
- self.port = port
- self.transport = TBufferedTransport(TSocket(host, int(port)))
- self.protocol = TBinaryProtocol(self.transport)
- self.client = clustermanagerservice.Client(self.protocol)
- self.client._transport = self.transport
- self.client._transport.open()
- def __del__(self):
- self.client._transport.close()
-
-def incrementor(init=0):
- while 1:
- yield init
- init = init + 1
-
-# FIXME: don't duplicate code from clustermanager
-# def getConfig(args):
-# config = ConfigParser()
-# configFiles = [
-# '/usr/share/tashi/ClusterManagerDefaults.cfg',
-# '/etc/tashi/ClusterManager.cfg',
-# os.path.expanduser('~/.tashi/ClusterManager.cfg')
-# ] + ([args[0]] if len(args) > 0 else [])
-
-# configFiles = config.read(configFiles)
-# if len(configFiles) == 0:
-# print >>sys.stderr, 'Unable to find the configuration file\n'
-# sys.exit(3)
-
-# return config
-
-
-class TestClient(unittest.TestCase):
- @synchronized()
- def getPortNum(self):
- return self.portnum.next()
-
- """macro test cases for single-host tests
-
- Assumes cwd is 'src/tashi/client/'
- """
- def setUp(self):
- """Create a CM and single NM on local host"""
- logging.info('setting up test')
-
- (self.config, self.configfiles) = getConfig([])
-
- self.port = 1717 # FIXME: take this (and other things) from config file
- self.portnum = incrementor(self.port)
-
- self.cwd = os.getcwd()
- self.srcd = os.path.dirname(os.path.dirname(self.cwd))
-
- self.environ = copy.copy(os.environ)
- self.environ['PYTHONPATH'] = self.srcd
- logging.info('base path = %s' % self.srcd)
-
- self.nm = os.spawnlpe(os.P_NOWAIT, 'python', 'python',
- os.path.join(self.srcd, 'tashi', 'nodemanager', 'nodemanager.py'),
- self.environ)
- self.cm = os.spawnlpe(os.P_WAIT, 'python', 'python',
- os.path.join(self.srcd, 'tashi', 'clustermanager', 'clustermanager.py'),
- '--drop', '--create',
- os.path.expanduser('~/.tashi/ClusterManager.cfg'),
- self.environ)
- self.cm = os.spawnlpe(os.P_NOWAIT, 'python', 'python',
- os.path.join(self.srcd, 'tashi', 'clustermanager', 'clustermanager.py'),
- os.path.expanduser('~/.tashi/ClusterManager.cfg'),
- self.environ)
- # since we are spawning with P_NOWAIT, we need to sleep to ensure that the CM is listening
- time.sleep(1)
- try:
- self.connection = ClientConnection('localhost', self.config.get('ClusterManagerService', 'port'))
- except Exception, e:
- logging.warning('client connection failed')
- ex = None
- try:
- logging.warning("setUp killing node manager " + str(self.nm))
- os.kill(self.nm, signal.SIGKILL)
- except Exception, e:
- ex = e
- logging.warning('could not kill node manager: '+ str(e))
- try:
- logging.warning('setUp killing cluster manager ' + str(self.cm))
- os.kill(self.cm, signal.SIGKILL)
- except Exception, e:
- ex = e
- logging.warning('could not kill cluster manager: ' + str(e))
- if e != None:
- raise e
-
- logging.info('node manager PID: %i' % self.nm)
- def tearDown(self):
- '''Kill the CM and NM that were created by setUP'''
- logging.info('tearing down test')
- ex = None
- try:
- logging.debug("killing cluster manager " + str(self.cm))
- os.kill(self.cm, signal.SIGKILL)
- except Exception, e:
- ex = e
- logging.error('Could not kill cluster manager: ' + str(e))
-
- try:
- logging.debug("killing node manager " + str(self.nm))
- os.kill(self.nm, signal.SIGKILL)
- except Exception, e:
- ex = e
- logging.error('Could not kill node manager: ' + str(e))
- if ex != None:
- raise ex
- def testSetup(self):
- '''empty test to ensure that setUp code works'''
- logging.info('setting up')
- def testHostManagement(self):
- '''test adding/removing/listing hosts
-
- Right now this just adds a single host: localhost. Eventually
- it should 1) take a list of hosts from a test configuration
- file, 2) ensure that all were added, 3) remove a random
- subset, 4) ensure that they were correctly removed, 5) remove
- all, 6) ensure that they were correctly removed.'''
-
- # get empty host list
- hosts = self.connection.client.getHosts()
- self.assertEqual(hosts, [], 'starting host list not empty: ' + str(hosts) )
-
- # add a host
- host = Host()
- host.hostname = 'localhost'
- host.enabled=True
- self.connection.client.addHost(host)
- hosts = self.connection.client.getHosts()
- self.assertEqual(len(hosts), 1, 'wrong number of hosts %i, should be %i' % (len(hosts), 1) )
- self.assertEqual(hosts[0].hostname, 'localhost', 'wrong hostname: ' + str(hosts[0].hostname) )
-
- # remove first host
- hid = hosts[0].id
- self.connection.client.removeHost(hid)
- hosts = self.connection.client.getHosts()
- self.assertEqual(hosts, [], 'host list not empty after remove: ' + str(hosts) )
-
- def testMessaging(self):
- '''test messaging system started by CM
-
- tests messages published directly, through events in the CM,
- and the log system'''
- # FIXME: add tests for generating events as a side-effect of
- # rpc commands, as well as logging in the CM
- portnum = self.getPortNum()
- self.sub = TestTashiSubscriber(self.config, portnum)
- self.assertEqual(self.sub.messageQueue.qsize(), 0)
- self.pub = tashi.messaging.thriftmessaging.PublisherThrift(self.config.get('MessageBroker', 'host'),
- int(self.config.get('MessageBroker', 'port')))
- self.pub.publish({'message-type':'text', 'message':'Hello World!'})
- time.sleep(0.5)
- print '*** QSIZE', self.sub.messageQueue.qsize()
- self.assertEqual(self.sub.messageQueue.qsize(), 1)
-
- self.log = logging.getLogger(__name__)
- messageHandler = tashi.messaging.tashimessaging.TashiLogHandler(self.config)
- self.log.addHandler(messageHandler)
- # FIXME: why can't we log messages with severity below 'warning'?
- self.log.warning('test log message')
- time.sleep(0.5)
- self.assertEqual(self.sub.messageQueue.qsize(), 2)
-
- # This should generate at least one log message
-# hosts = self.connection.client.getHosts()
-# time.sleep(0.5)
-# if (self.sub.messageQueue.qsize() <= 2):
-# self.fail()
-
- def testUserManagement(self):
- '''test adding/removing/listing users
-
- same as testHostManagement, but with users'''
- usernames = ['sleepy', 'sneezy', 'dopey', 'doc',
- 'grumpy', 'bashful', 'happy']
- # add all users
- for un in usernames:
- user = User()
- user.username = un
- self.connection.client.addUser(user)
- # ensure that all were added
- users = self.connection.client.getUsers()
- self.assertEqual(len(usernames), len(users))
- for user in users:
- usernames.remove(user.username)
- self.assertEqual(0, len(usernames))
- # remove a random subset
- rm = random.sample(users, 4)
- for user in rm:
- self.connection.client.removeUser(user.id)
- users.remove(user)
- newUsers = self.connection.client.getUsers()
- # This ensures that the remaining ones are what we expect:
- for user in newUsers:
- # if there is a user remaining that we asked to be removed,
- # this will throw an exception
- users.remove(user)
- # if a user was removed that we did not intend, this will
- # throw an exception
- self.assertEqual(0, len(users))
-
-# def testInstanceConfigurationManagement(self):
-# '''test adding/removing/listing instance configurations
-
-# same as testHostManagement, but with instance configurations'''
-# self.fail('test not implemented')
- def testHardDiskConfigurationManagement(self):
- '''test adding/removing/listing hard disk configurations
-
- same as testHostManagement, but with hard disk configurations'''
-
- user = User(d={'username':'sleepy'})
- self.connection.client.addUser(user)
- users = self.connection.client.getUsers()
-
- per = PersistentImage()
- per.userId = users[0].id
- per.name = 'sleepy-PersistentImage'
- self.connection.client.addPersistentImage(per)
- pers = self.connection.client.getPersistentImages()
-
- inst = InstanceConfiguration()
- inst.name = 'sleepy-inst'
- inst.memory = 512
- inst.cores = 1
- self.connection.client.addInstanceConfiguration(inst)
- insts = self.connection.client.getInstanceConfigurations()
-
- hdc = HardDiskConfiguration()
- hdc.index = 0
- hdc.persistentImageId = pers[0].id
- hdc.persistent = False
- hdc.instanceConfigurationId = insts[0].id
-
-# def testCreateDestroyShutdown(self):
-# '''test creating/destroying/shutting down VMs
-
-# not implemented'''
-# self.fail('test not implemented')
-# def testSuspendResume(self):
-# '''test suspending/resuming VMs
-
-# not implemented'''
-# self.fail('test not implemented')
-# def testMigrate(self):
-# '''test migration
-
-# not implemented'''
-# self.fail('test not implemented')
-# def testPauseUnpause(self):
-# '''test pausing/unpausing VMs
-
-# not implemented'''
-# self.fail('test not implemented')
-
-
-##############################
-# Test Code
-##############################
-if __name__ == '__main__':
- logging.basicConfig(level=logging.NOTSET,
- format="%(asctime)s %(levelname)s:\t %(message)s",
- stream=sys.stdout)
-
- suite = unittest.TestLoader().loadTestsFromTestCase(TestClient)
- unittest.TextTestRunner(verbosity=2).run(suite)
-
diff --git a/src/tashi/clustermanager/clustermanager.py b/src/tashi/clustermanager/clustermanager.py
index db61194..83131cf 100755
--- a/src/tashi/clustermanager/clustermanager.py
+++ b/src/tashi/clustermanager/clustermanager.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
@@ -17,11 +17,12 @@
# specific language governing permissions and limitations
# under the License.
+import os
import sys
-import signal
import logging.config
-from tashi.util import signalHandler, boolean, instantiateImplementation, getConfig, debugConsole
+from tashi.util import boolean, instantiateImplementation, debugConsole
+from tashi.utils.config import Config
import tashi
from tashi.rpycservices import rpycservices
@@ -46,6 +47,9 @@
users[config.get('AllowedUsers', 'nodeManagerUser')] = config.get('AllowedUsers', 'nodeManagerPassword')
users[config.get('AllowedUsers', 'agentUser')] = config.get('AllowedUsers', 'agentPassword')
authenticator = TlsliteVdbAuthenticator.from_dict(users)
+
+ # XXXstroucki ThreadedServer is liable to have exceptions
+ # occur within if an endpoint is lost.
t = ThreadedServer(service=rpycservices.ManagerService, hostname='0.0.0.0', port=int(config.get('ClusterManagerService', 'port')), auto_register=False, authenticator=authenticator)
else:
t = ThreadedServer(service=rpycservices.ManagerService, hostname='0.0.0.0', port=int(config.get('ClusterManagerService', 'port')), auto_register=False)
@@ -54,24 +58,17 @@
t.service._type = 'ClusterManagerService'
debugConsole(globals())
-
- try:
- t.start()
- except KeyboardInterrupt:
- handleSIGTERM(signal.SIGTERM, None)
-@signalHandler(signal.SIGTERM)
-def handleSIGTERM(signalNumber, stackFrame):
- global log
+ t.start()
+ # shouldn't exit by itself
+ return
- log.info('Exiting cluster manager after receiving a SIGINT signal')
- sys.exit(0)
-
def main():
global log
# setup configuration and logging
- (config, configFiles) = getConfig(["ClusterManager"])
+ config = Config(["ClusterManager"])
+ configFiles = config.getFiles()
publisher = instantiateImplementation(config.get("ClusterManager", "publisher"), config)
tashi.publisher = publisher
logging.config.fileConfig(configFiles)
@@ -80,7 +77,32 @@
# bind the database
log.info('Starting cluster manager')
- startClusterManager(config)
+
+ # handle keyboard interrupts (http://code.activestate.com/recipes/496735-workaround-for-missed-sigint-in-multithreaded-prog/)
+ child = os.fork()
+
+ if child == 0:
+ startClusterManager(config)
+ # shouldn't exit by itself
+ sys.exit(0)
+
+ else:
+ # main
+ try:
+ os.waitpid(child, 0)
+ except KeyboardInterrupt:
+ log.info("Exiting cluster manager after receiving a SIGINT signal")
+ os._exit(0)
+ except Exception:
+ log.exception("Abnormal termination of cluster manager")
+ os._exit(-1)
+
+ log.info("Exiting cluster manager after service thread exited")
+ os._exit(-1)
+
+ return
+
+
if __name__ == "__main__":
main()
diff --git a/src/tashi/clustermanager/clustermanagerservice.py b/src/tashi/clustermanager/clustermanagerservice.py
index 284ffcb..fd56db3 100644
--- a/src/tashi/clustermanager/clustermanagerservice.py
+++ b/src/tashi/clustermanager/clustermanagerservice.py
@@ -19,9 +19,8 @@
import threading
import time
-from tashi.rpycservices import rpycservices
-from tashi.rpycservices.rpyctypes import Errors, InstanceState, HostState, TashiException
-from tashi import boolean, ConnectionManager, vmStates, version, scrubString
+from tashi.rpycservices.rpyctypes import Errors, InstanceState, Instance, HostState, TashiException
+from tashi import boolean, ConnectionManager, vmStates, hostStates, version, scrubString
class ClusterManagerService(object):
"""RPC service for the ClusterManager"""
@@ -36,7 +35,7 @@
else:
self.username = None
self.password = None
- self.proxy = ConnectionManager(self.username, self.password, int(self.config.get('ClusterManager', 'nodeManagerPort')))
+ self.proxy = ConnectionManager(self.username, self.password, int(self.config.get('ClusterManager', 'nodeManagerPort')), authAndEncrypt=self.authAndEncrypt)
self.dfs = dfs
self.convertExceptions = boolean(config.get('ClusterManagerService', 'convertExceptions'))
self.log = logging.getLogger(__name__)
@@ -49,6 +48,9 @@
self.allowMismatchedVersions = boolean(self.config.get('ClusterManagerService', 'allowMismatchedVersions'))
self.maxMemory = int(self.config.get('ClusterManagerService', 'maxMemory'))
self.maxCores = int(self.config.get('ClusterManagerService', 'maxCores'))
+
+ self.defaultNetwork = self.config.getint('ClusterManagerService', 'defaultNetwork', 0)
+
self.allowDuplicateNames = boolean(self.config.get('ClusterManagerService', 'allowDuplicateNames'))
self.accountingHost = None
@@ -62,7 +64,7 @@
self.__initAccounting()
self.__initCluster()
- threading.Thread(target=self.__monitorCluster).start()
+ threading.Thread(name="monitorCluster", target=self.__monitorCluster).start()
def __initAccounting(self):
self.accountBuffer = []
@@ -71,7 +73,7 @@
try:
if (self.accountingHost is not None) and \
(self.accountingPort is not None):
- self.accountingClient=rpycservices.client(self.accountingHost, self.accountingPort)
+ self.accountingClient = ConnectionManager(self.username, self.password, self.accountingPort)[self.accountingHost]
except:
self.log.exception("Could not init accounting")
@@ -126,7 +128,7 @@
except:
self.log.exception("Invalid host data")
- secondary = ','.join(filter(None, (hostText, instanceText)))
+ secondary = ','.join(filter(None, (hostText, instanceText)))
line = "%s|%s|%s" % (now, text, secondary)
@@ -232,7 +234,7 @@
# get a list of VMs running on host
try:
hostProxy = self.proxy[host.name]
- remoteInstances = [hostProxy.getVmInfo(vmId) for vmId in hostProxy.listVms()]
+ remoteInstances = [self.__getVmInfo(host.name, vmId) for vmId in hostProxy.listVms()]
except:
self.log.warning('Failure getting instances from host %s' % (host.name))
self.data.releaseHost(host)
@@ -241,6 +243,9 @@
# register instances I don't know about
for instance in remoteInstances:
if (instance.id not in myInstances):
+ if instance.state == InstanceState.Exited:
+ self.log.warning("%s telling me about exited instance %s, ignoring." % (host.name, instance.id))
+ continue
instance.hostId = host.id
instance = self.data.registerInstance(instance)
self.data.releaseInstance(instance)
@@ -269,18 +274,22 @@
# iterate through all VMs I believe are active
for instanceId in self.instanceLastContactTime.keys():
- # Don't query non-running VMs. eg. if a VM
- # is suspended, and has no host, then there's
- # no one to ask
- if instance.state != InstanceState.Running and \
- instance.state != InstanceState.Activating and \
- instance.state != InstanceState.Orphaned:
- continue
# XXXstroucki should lock instance here?
- if (self.instanceLastContactTime[instanceId] < (self.__now() - self.allowDecayed)):
+ try:
+ lastContactTime = self.instanceLastContactTime[instanceId]
+ except KeyError:
+ continue
+
+ if (lastContactTime < (self.__now() - self.allowDecayed)):
try:
instance = self.data.acquireInstance(instanceId)
+ # Don't query non-running VMs. eg. if a VM
+ # is suspended, and has no host, then there's
+ # no one to ask
+ if instance.state not in [InstanceState.Running, InstanceState.Activating, InstanceState.Orphaned]:
+ self.data.releaseInstance(instance)
+ continue
except:
continue
@@ -293,22 +302,34 @@
# get updated state on VM
try:
- hostProxy = self.proxy[host.name]
- newInstance = hostProxy.getVmInfo(instance.vmId)
+ newInstance = self.__getVmInfo(host.name, instance.vmId)
except:
self.log.warning('Failure getting data for instance %s from host %s' % (instance.name, host.name))
self.data.releaseInstance(instance)
continue
- # replace existing state with new state
- # XXXstroucki more?
- instance.state = newInstance.state
- self.instanceLastContactTime[instanceId] = self.__now()
- instance.decayed = False
- self.data.releaseInstance(instance)
+ # update the information we have on the vm
+ #before = instance.state
+ rv = self.__vmUpdate(instance, newInstance, None)
+ if (rv == "release"):
+ self.data.releaseInstance(instance)
+
+ if (rv == "remove"):
+ self.data.removeInstance(instance)
- def normalize(self, instance):
+ def __getVmInfo(self, host, vmid):
+ hostProxy = self.proxy[host]
+ rv = hostProxy.getVmInfo(vmid)
+ if isinstance(rv, Exception):
+ raise rv
+
+ if not isinstance(rv, Instance):
+ raise ValueError
+
+ return rv
+
+ def __normalize(self, instance):
instance.id = None
instance.vmId = None
instance.hostId = None
@@ -336,18 +357,20 @@
del instance.hints[hint]
return instance
+ # extern
def createVm(self, instance):
"""Function to add a VM to the list of pending VMs"""
# XXXstroucki: check for exception here
- instance = self.normalize(instance)
+ instance = self.__normalize(instance)
instance = self.data.registerInstance(instance)
self.data.releaseInstance(instance)
self.__ACCOUNT("CM VM REQUEST", instance=instance)
return instance
-
+
+ # extern
def shutdownVm(self, instanceId):
instance = self.data.acquireInstance(instanceId)
- self.__stateTransition(instance, InstanceState.Running, InstanceState.ShuttingDown)
+ self.__stateTransition(instance, None, InstanceState.ShuttingDown)
self.data.releaseInstance(instance)
self.__ACCOUNT("CM VM SHUTDOWN", instance=instance)
hostname = self.data.getHost(instance.hostId).name
@@ -357,7 +380,8 @@
self.log.exception('shutdownVm failed for host %s vmId %d' % (instance.name, instance.vmId))
raise
return
-
+
+ # extern
def destroyVm(self, instanceId):
instance = self.data.acquireInstance(instanceId)
if (instance.state is InstanceState.Pending or instance.state is InstanceState.Held):
@@ -365,7 +389,7 @@
self.data.removeInstance(instance)
elif (instance.state is InstanceState.Activating):
self.__ACCOUNT("CM VM DESTROY STARTING", instance=instance)
- self.__stateTransition(instance, InstanceState.Activating, InstanceState.Destroying)
+ self.__stateTransition(instance, None, InstanceState.Destroying)
self.data.releaseInstance(instance)
else:
# XXXstroucki: This is a problem with keeping
@@ -381,15 +405,21 @@
self.proxy[hostname].destroyVm(instance.vmId)
self.data.releaseInstance(instance)
except:
- self.log.exception('destroyVm failed on host %s vmId %s' % (hostname, str(instance.vmId)))
+ self.log.warning('destroyVm failed on host %s vmId %s' % (hostname, str(instance.vmId)))
self.data.removeInstance(instance)
return
+ # extern
def suspendVm(self, instanceId):
instance = self.data.acquireInstance(instanceId)
- self.__stateTransition(instance, InstanceState.Running, InstanceState.Suspending)
+ try:
+ self.__stateTransition(instance, InstanceState.Running, InstanceState.Suspending)
+ except TashiException:
+ self.data.releaseInstance(instance)
+ raise
+
self.data.releaseInstance(instance)
self.__ACCOUNT("CM VM SUSPEND", instance=instance)
hostname = self.data.getHost(instance.hostId).name
@@ -401,15 +431,22 @@
raise TashiException(d={'errno':Errors.UnableToSuspend, 'msg':'Failed to suspend %s' % (instance.name)})
return
+ # extern
def resumeVm(self, instanceId):
instance = self.data.acquireInstance(instanceId)
- self.__stateTransition(instance, InstanceState.Suspended, InstanceState.Pending)
+ try:
+ self.__stateTransition(instance, InstanceState.Suspended, InstanceState.Pending)
+ except TashiException:
+ self.data.releaseInstance(instance)
+ raise
+
source = "suspend/%d_%s" % (instance.id, instance.name)
instance.hints['__resume_source'] = source
self.data.releaseInstance(instance)
self.__ACCOUNT("CM VM RESUME", instance=instance)
return instance
+ # extern
def migrateVm(self, instanceId, targetHostId):
instance = self.data.acquireInstance(instanceId)
self.__ACCOUNT("CM VM MIGRATE", instance=instance)
@@ -421,7 +458,13 @@
except:
self.data.releaseInstance(instance)
raise
- self.__stateTransition(instance, InstanceState.Running, InstanceState.MigratePrep)
+
+ try:
+ self.__stateTransition(instance, InstanceState.Running, InstanceState.MigratePrep)
+ except TashiException:
+ self.data.releaseInstance(instance)
+ raise
+
self.data.releaseInstance(instance)
try:
# Prepare the target
@@ -433,7 +476,12 @@
self.log.exception('prepReceiveVm failed')
raise
instance = self.data.acquireInstance(instance.id)
- self.__stateTransition(instance, InstanceState.MigratePrep, InstanceState.MigrateTrans)
+ try:
+ self.__stateTransition(instance, InstanceState.MigratePrep, InstanceState.MigrateTrans)
+ except TashiException:
+ self.data.releaseInstance(instance)
+ raise
+
self.data.releaseInstance(instance)
try:
# Send the VM
@@ -449,15 +497,23 @@
try:
# Notify the target
- vmId = self.proxy[targetHost.name].receiveVm(instance, cookie)
+ __vmid = self.proxy[targetHost.name].receiveVm(instance, cookie)
except Exception:
self.log.exception('receiveVm failed')
raise
+
+ self.log.info("migrateVM finished")
return
-
+
+ # extern
def pauseVm(self, instanceId):
instance = self.data.acquireInstance(instanceId)
- self.__stateTransition(instance, InstanceState.Running, InstanceState.Pausing)
+ try:
+ self.__stateTransition(instance, InstanceState.Running, InstanceState.Pausing)
+ except TashiException:
+ self.data.releaseInstance(instance)
+ raise
+
self.data.releaseInstance(instance)
self.__ACCOUNT("CM VM PAUSE", instance=instance)
hostname = self.data.getHost(instance.hostId).name
@@ -467,13 +523,24 @@
self.log.exception('pauseVm failed on host %s with vmId %d' % (hostname, instance.vmId))
raise
instance = self.data.acquireInstance(instanceId)
- self.__stateTransition(instance, InstanceState.Pausing, InstanceState.Paused)
+ try:
+ self.__stateTransition(instance, InstanceState.Pausing, InstanceState.Paused)
+ except TashiException:
+ self.data.releaseInstance(instance)
+ raise
+
self.data.releaseInstance(instance)
return
+ # extern
def unpauseVm(self, instanceId):
instance = self.data.acquireInstance(instanceId)
- self.__stateTransition(instance, InstanceState.Paused, InstanceState.Unpausing)
+ try:
+ self.__stateTransition(instance, InstanceState.Paused, InstanceState.Unpausing)
+ except TashiException:
+ self.data.releaseInstance(instance)
+ raise
+
self.data.releaseInstance(instance)
self.__ACCOUNT("CM VM UNPAUSE", instance=instance)
hostname = self.data.getHost(instance.hostId).name
@@ -483,25 +550,61 @@
self.log.exception('unpauseVm failed on host %s with vmId %d' % (hostname, instance.vmId))
raise
instance = self.data.acquireInstance(instanceId)
- self.__stateTransition(instance, InstanceState.Unpausing, InstanceState.Running)
+ try:
+ self.__stateTransition(instance, InstanceState.Unpausing, InstanceState.Running)
+ except TashiException:
+ self.data.releaseInstance(instance)
+ raise
+
self.data.releaseInstance(instance)
return
-
+
+ # extern
def getHosts(self):
return self.data.getHosts().values()
+ # extern
+ def setHostState(self, hostId, state):
+ state = state.lower()
+ hostState = None
+ if state == "normal":
+ hostState = HostState.Normal
+ if state == "drained":
+ hostState = HostState.Drained
+
+ if hostState is None:
+ return "%s is not a valid host state" % state
+
+ host = self.data.acquireHost(hostId)
+ try:
+ host.state = hostState
+ finally:
+ self.data.releaseHost(host)
+
+ return "Host state set to %s." % hostStates[hostState]
+
+ # extern
def getNetworks(self):
- return self.data.getNetworks().values()
-
+ networks = self.data.getNetworks()
+ for network in networks:
+ if self.defaultNetwork == networks[network].id:
+ setattr(networks[network], "default", True)
+
+ return networks.values()
+
+ # extern
def getUsers(self):
return self.data.getUsers().values()
-
+
+ # extern
def getInstances(self):
return self.data.getInstances().values()
+ # extern
def getImages(self):
return self.data.getImages()
+ # extern
def copyImage(self, src, dst):
imageSrc = self.dfs.getLocalHandle("images/" + src)
imageDst = self.dfs.getLocalHandle("images/" + dst)
@@ -515,6 +618,7 @@
except Exception, e:
self.log.exception('DFS image copy failed: %s (%s->%s)' % (e, imageSrc, imageDst))
+ # extern
def vmmSpecificCall(self, instanceId, arg):
instance = self.data.getInstance(instanceId)
hostname = self.data.getHost(instance.hostId).name
@@ -526,7 +630,7 @@
raise
return res
-# @timed
+ # extern
def registerNodeManager(self, host, instances):
"""Called by the NM every so often as a keep-alive/state polling -- state changes here are NOT AUTHORITATIVE"""
@@ -559,45 +663,47 @@
# let the host communicate what it is running
# and note that the information is not stale
for instance in instances:
+ if instance.state == InstanceState.Exited:
+ self.log.warning("%s reporting exited instance %s, ignoring." % (host.name, instance.id))
+ continue
self.instanceLastContactTime.setdefault(instance.id, 0)
self.data.releaseHost(oldHost)
return host.id
- def vmUpdate(self, instanceId, instance, oldState):
- try:
- oldInstance = self.data.acquireInstance(instanceId)
- except TashiException, e:
- # shouldn't have a lock to clean up after here
- if (e.errno == Errors.NoSuchInstanceId):
- self.log.warning('Got vmUpdate for unknown instanceId %d' % (instanceId))
- return
- except:
- self.log.exception("Could not acquire instance")
- raise
+ def __vmUpdate(self, oldInstance, instance, oldState):
+ # this function assumes a lock is held on the instance
+ # already, and will be released elsewhere
- self.instanceLastContactTime[instanceId] = self.__now()
+ self.instanceLastContactTime[oldInstance.id] = self.__now()
oldInstance.decayed = False
- self.__ACCOUNT("CM VM UPDATE", instance=oldInstance)
if (instance.state == InstanceState.Exited):
# determine why a VM has exited
hostname = self.data.getHost(oldInstance.hostId).name
+
if (oldInstance.state not in [InstanceState.ShuttingDown, InstanceState.Destroying, InstanceState.Suspending]):
self.log.warning('Unexpected exit on %s of instance %s (vmId %d)' % (hostname, oldInstance.name, oldInstance.vmId))
+
if (oldInstance.state == InstanceState.Suspending):
self.__stateTransition(oldInstance, InstanceState.Suspending, InstanceState.Suspended)
oldInstance.hostId = None
oldInstance.vmId = None
- self.data.releaseInstance(oldInstance)
+ return "release"
+
+ if (oldInstance.state == InstanceState.MigrateTrans):
+ # Just await update from target host
+ return "release"
+
else:
del self.instanceLastContactTime[oldInstance.id]
- self.data.removeInstance(oldInstance)
+ return "remove"
+
else:
if (instance.state):
# XXXstroucki does this matter?
if (oldState and oldInstance.state != oldState):
- self.log.warning('Got vmUpdate of state from %s to %s, but the instance was previously %s' % (vmStates[oldState], vmStates[instance.state], vmStates[oldInstance.state]))
+ self.log.warning('Doing vmUpdate of state from %s to %s, but the instance was previously %s' % (vmStates[oldState], vmStates[instance.state], vmStates[oldInstance.state]))
oldInstance.state = instance.state
if (instance.vmId):
oldInstance.vmId = instance.vmId
@@ -610,11 +716,44 @@
if (oldNic.mac == nic.mac):
oldNic.ip = nic.ip
- self.data.releaseInstance(oldInstance)
+ return "release"
+
return "success"
-
+
+ # extern
+ def vmUpdate(self, instanceId, instance, oldState):
+ try:
+ oldInstance = self.data.acquireInstance(instanceId)
+ except TashiException, e:
+ # shouldn't have a lock to clean up after here
+ if (e.errno == Errors.NoSuchInstanceId):
+ self.log.warning('Got vmUpdate for unknown instanceId %d' % (instanceId))
+ return
+ except:
+ self.log.exception("Could not acquire instance")
+ raise
+
+ import copy
+ displayInstance = copy.copy(oldInstance)
+ displayInstance.state = instance.state
+ self.__ACCOUNT("CM VM UPDATE", instance=displayInstance)
+
+ rv = self.__vmUpdate(oldInstance, instance, oldState)
+
+ if (rv == "release"):
+ self.data.releaseInstance(oldInstance)
+
+ if (rv == "remove"):
+ self.data.removeInstance(oldInstance)
+
+ return "success"
+
+ # extern
def activateVm(self, instanceId, host):
+ # XXXstroucki: check my idea of the host's capacity before
+ # trying.
+
dataHost = self.data.acquireHost(host.id)
if (dataHost.name != host.name):
@@ -632,7 +771,7 @@
self.__ACCOUNT("CM VM ACTIVATE", instance=instance)
if ('__resume_source' in instance.hints):
- self.__stateTransition(instance, InstanceState.Pending, InstanceState.Resuming)
+ self.__stateTransition(instance, None, InstanceState.Resuming)
else:
# XXXstroucki should held VMs be continually tried? Or be explicitly set back to pending?
#self.__stateTransition(instance, InstanceState.Pending, InstanceState.Activating)
@@ -678,12 +817,13 @@
self.data.releaseInstance(instance)
return "success"
- def registerHost(self, hostname, memory, cores, version):
- hostId, alreadyRegistered = self.data.registerHost(hostname, memory, cores, version)
- if alreadyRegistered:
- self.log.info("Host %s is already registered, it was updated now" % hostname)
- else:
- self.log.info("A host was registered - hostname: %s, version: %s, memory: %s, cores: %s" % (hostname, version, memory, cores))
+ # extern
+ def registerHost(self, hostname, memory, cores, version):
+ hostId, alreadyRegistered = self.data.registerHost(hostname, memory, cores, version)
+ if alreadyRegistered:
+ self.log.info("Host %s is already registered, it was updated now" % hostname)
+ else:
+ self.log.info("A host was registered - hostname: %s, version: %s, memory: %s, cores: %s" % (hostname, version, memory, cores))
try:
host = self.data.getHost(hostId)
@@ -691,9 +831,10 @@
except:
self.log.warning("Failed to lookup host %s" % hostId)
- return hostId
+ return hostId
- def unregisterHost(self, hostId):
+ # extern
+ def unregisterHost(self, hostId):
try:
host = self.data.getHost(hostId)
self.__ACCOUNT("CM HOST UNREGISTER", host=host)
@@ -701,9 +842,9 @@
self.log.warning("Failed to lookup host %s" % hostId)
return
- self.data.unregisterHost(hostId)
- self.log.info("Host %s was unregistered" % hostId)
- return
+ self.data.unregisterHost(hostId)
+ self.log.info("Host %s was unregistered" % hostId)
+ return
# service thread
def __monitorCluster(self):
diff --git a/src/tashi/clustermanager/data/datainterface.py b/src/tashi/clustermanager/data/datainterface.py
index e58fb6d..a22297e 100644
--- a/src/tashi/clustermanager/data/datainterface.py
+++ b/src/tashi/clustermanager/data/datainterface.py
@@ -43,25 +43,28 @@
def getHosts(self):
raise NotImplementedError
- def getHost(self, id):
+ def getHost(self, _id):
+ raise NotImplementedError
+
+ def getImages(self):
raise NotImplementedError
def getInstances(self):
raise NotImplementedError
- def getInstance(self, id):
+ def getInstance(self, _id):
raise NotImplementedError
def getNetworks(self):
raise NotImplementedError
- def getNetwork(self, id):
+ def getNetwork(self, _id):
raise NotImplementedError
def getUsers(self):
raise NotImplementedError
- def getUser(self, id):
+ def getUser(self, _id):
raise NotImplementedError
def registerHost(self, hostname, memory, cores, version):
diff --git a/src/tashi/clustermanager/data/fromconfig.py b/src/tashi/clustermanager/data/fromconfig.py
index 8511a07..68465fa 100644
--- a/src/tashi/clustermanager/data/fromconfig.py
+++ b/src/tashi/clustermanager/data/fromconfig.py
@@ -15,17 +15,21 @@
# specific language governing permissions and limitations
# under the License.
+#XXXstroucki: for compatibility with python 2.5
from __future__ import with_statement
+
+import logging
import threading
import os
import ConfigParser
-from tashi.rpycservices.rpyctypes import Host, Network, User, TashiException, Errors, HostState
+from tashi.rpycservices.rpyctypes import Host, Network, User, TashiException, Errors, HostState, Instance
from tashi.clustermanager.data import DataInterface
class FromConfig(DataInterface):
def __init__(self, config):
DataInterface.__init__(self, config)
+ self.log = logging.getLogger(__name__)
self.hosts = {}
self.instances = {}
self.networks = {}
@@ -78,6 +82,10 @@
return instanceId
def registerInstance(self, instance):
+ if type(instance) is not Instance:
+ self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+ raise TypeError
+
self.acquireLock(self.instanceLock)
try:
if (instance.id is not None and instance.id not in self.instances):
@@ -107,6 +115,10 @@
return instance
def releaseInstance(self, instance):
+ if type(instance) is not Instance:
+ self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+ raise TypeError
+
try:
if (instance.id not in self.instances): # MPR: should never be true, but good to check
raise TashiException(d={'errno':Errors.NoSuchInstanceId,'msg':"No such instanceId - %d" % (instance.id)})
@@ -114,6 +126,10 @@
self.releaseLock(instance._lock)
def removeInstance(self, instance):
+ if type(instance) is not Instance:
+ self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+ raise TypeError
+
self.acquireLock(self.instanceLock)
try:
del self.instances[instance.id]
@@ -122,6 +138,10 @@
self.releaseLock(self.instanceLock)
def acquireHost(self, hostId):
+ if type(hostId) is not int:
+ self.log.exception("Argument is not of type int, but of type %s" % (type(hostId)))
+ raise TypeError
+
self.hostLock.acquire()
host = self.hosts.get(hostId, None)
if (host is None):
@@ -134,6 +154,10 @@
def releaseHost(self, host):
+ if type(host) is not Host:
+ self.log.exception("Argument is not of type Host, but of type %s" % (type(host)))
+ raise TypeError
+
try:
if (host.id not in self.hosts): # MPR: should never be true, but good to check
raise TashiException(d={'errno':Errors.NoSuchHostId,'msg':"No such hostId - %s" % (host.id)})
@@ -145,47 +169,47 @@
def getHosts(self):
return self.hosts
- def getHost(self, id):
- host = self.hosts.get(id, None)
+ def getHost(self, _id):
+ host = self.hosts.get(_id, None)
if (not host):
- raise TashiException(d={'errno':Errors.NoSuchHostId,'msg':"No such hostId - %s" % (id)})
+ raise TashiException(d={'errno':Errors.NoSuchHostId,'msg':"No such hostId - %s" % (_id)})
return host
def getInstances(self):
return self.instances
- def getInstance(self, id):
- instance = self.instances.get(id, None)
+ def getInstance(self, _id):
+ instance = self.instances.get(_id, None)
if (not instance):
- raise TashiException(d={'errno':Errors.NoSuchInstanceId,'msg':"No such instanceId - %d" % (id)})
+ raise TashiException(d={'errno':Errors.NoSuchInstanceId,'msg':"No such instanceId - %d" % (_id)})
return instance
def getNetworks(self):
return self.networks
- def getNetwork(self, id):
- return self.networks[id]
+ def getNetwork(self, _id):
+ return self.networks[_id]
def getUsers(self):
return self.users
- def getUser(self, id):
- return self.users[id]
+ def getUser(self, _id):
+ return self.users[_id]
def registerHost(self, hostname, memory, cores, version):
self.hostLock.acquire()
- for id in self.hosts.keys():
- if self.hosts[id].name == hostname:
- host = Host(d={'id':id,'name':hostname,'state':HostState.Normal,'memory':memory,'cores':cores,'version':version})
- self.hosts[id] = host
+ for _id in self.hosts.keys():
+ if self.hosts[_id].name == hostname:
+ host = Host(d={'id':_id,'name':hostname,'state':HostState.Normal,'memory':memory,'cores':cores,'version':version})
+ self.hosts[_id] = host
self.save()
self.hostLock.release()
- return id, True
- id = self.getNewId("hosts")
- self.hosts[id] = Host(d={'id':id,'name':hostname,'state':HostState.Normal,'memory':memory,'cores':cores,'version':version})
+ return _id, True
+ _id = self.getNewId("hosts")
+ self.hosts[_id] = Host(d={'id':_id,'name':hostname,'state':HostState.Normal,'memory':memory,'cores':cores,'version':version})
self.save()
self.hostLock.release()
- return id, False
+ return _id, False
def unregisterHost(self, hostId):
self.hostLock.acquire()
@@ -200,10 +224,10 @@
maxId = 0
l = []
if(table == "hosts"):
- for id in self.hosts.keys():
- l.append(id)
- if id >= maxId:
- maxId = id
+ for _id in self.hosts.keys():
+ l.append(_id)
+ if _id >= maxId:
+ maxId = _id
l.sort() # sort to enable comparing with range output
# check if some id is released:
t = range(maxId + 1)
@@ -221,9 +245,9 @@
# and in what order does it get loaded
fileName = "./etc/Tashi.cfg"
if not os.path.exists(fileName):
- file = open(fileName, "w")
- file.write("[FromConfig]")
- file.close()
+ filehandle = open(fileName, "w")
+ filehandle.write("[FromConfig]")
+ filehandle.close()
parser = ConfigParser.ConfigParser()
parser.read(fileName)
@@ -231,7 +255,7 @@
parser.add_section("FromConfig")
hostsInFile = []
- for (name, value) in parser.items("FromConfig"):
+ for (name, __value) in parser.items("FromConfig"):
name = name.lower()
if (name.startswith("host")):
hostsInFile.append(name)
diff --git a/src/tashi/clustermanager/data/getentoverride.py b/src/tashi/clustermanager/data/getentoverride.py
index 21b2f8f..2cd5e69 100644
--- a/src/tashi/clustermanager/data/getentoverride.py
+++ b/src/tashi/clustermanager/data/getentoverride.py
@@ -15,16 +15,18 @@
# specific language governing permissions and limitations
# under the License.
+import logging
import subprocess
import time
import os
-from tashi.rpycservices.rpyctypes import User, LocalImages
+from tashi.rpycservices.rpyctypes import User, LocalImages, Instance, Host
from tashi.clustermanager.data import DataInterface
from tashi.util import instantiateImplementation, humanReadable
class GetentOverride(DataInterface):
def __init__(self, config):
DataInterface.__init__(self, config)
+ self.log = logging.getLogger(__name__)
self.baseDataObject = instantiateImplementation(config.get("GetentOverride", "baseData"), config)
self.dfs = instantiateImplementation(config.get("ClusterManager", "dfs"), config)
@@ -33,40 +35,60 @@
self.fetchThreshold = float(config.get("GetentOverride", "fetchThreshold"))
def registerInstance(self, instance):
+ if type(instance) is not Instance:
+ self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+ raise TypeError
+
return self.baseDataObject.registerInstance(instance)
def acquireInstance(self, instanceId):
return self.baseDataObject.acquireInstance(instanceId)
def releaseInstance(self, instance):
+ if type(instance) is not Instance:
+ self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+ raise TypeError
+
return self.baseDataObject.releaseInstance(instance)
def removeInstance(self, instance):
+ if type(instance) is not Instance:
+ self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+ raise TypeError
+
return self.baseDataObject.removeInstance(instance)
def acquireHost(self, hostId):
+ if type(hostId) is not int:
+ self.log.exception("Argument is not of type int, but of type %s" % (type(hostId)))
+ raise TypeError
+
return self.baseDataObject.acquireHost(hostId)
def releaseHost(self, host):
+ if type(host) is not Host:
+ self.log.exception("Argument is not of type Host, but of type %s" % (type(host)))
+ raise TypeError
+
return self.baseDataObject.releaseHost(host)
def getHosts(self):
return self.baseDataObject.getHosts()
- def getHost(self, id):
- return self.baseDataObject.getHost(id)
+ def getHost(self, _id):
+ return self.baseDataObject.getHost(_id)
def getInstances(self):
return self.baseDataObject.getInstances()
- def getInstance(self, id):
- return self.baseDataObject.getInstance(id)
+ def getInstance(self, _id):
+ return self.baseDataObject.getInstance(_id)
def getNetworks(self):
return self.baseDataObject.getNetworks()
- def getNetwork(self, id):
- return self.baseDataObject.getNetwork(id)
+ def getNetwork(self, _id):
+ return self.baseDataObject.getNetwork(_id)
def getImages(self):
count = 0
@@ -87,12 +109,12 @@
try:
for l in p.stdout.xreadlines():
ws = l.strip().split(":")
- id = int(ws[2])
+ _id = int(ws[2])
name = ws[0]
user = User()
- user.id = id
+ user.id = _id
user.name = name
- myUsers[id] = user
+ myUsers[_id] = user
self.users = myUsers
self.lastUserUpdate = now
finally:
@@ -102,9 +124,9 @@
self.fetchFromGetent()
return self.users
- def getUser(self, id):
+ def getUser(self, _id):
self.fetchFromGetent()
- return self.users[id]
+ return self.users[_id]
def registerHost(self, hostname, memory, cores, version):
return self.baseDataObject.registerHost(hostname, memory, cores, version)
diff --git a/src/tashi/clustermanager/data/ldapoverride.py b/src/tashi/clustermanager/data/ldapoverride.py
index 0236635..56b154c 100644
--- a/src/tashi/clustermanager/data/ldapoverride.py
+++ b/src/tashi/clustermanager/data/ldapoverride.py
@@ -17,9 +17,11 @@
import subprocess
import time
-from tashi.rpycservices.rpyctypes import User
+#XXXstroucki getImages requires os?
+import os
+from tashi.rpycservices.rpyctypes import User, LocalImages
+from tashi.util import instantiateImplementation, humanReadable
from tashi.clustermanager.data import DataInterface
-from tashi.util import instantiateImplementation
class LdapOverride(DataInterface):
def __init__(self, config):
@@ -31,6 +33,7 @@
self.nameKey = config.get("LdapOverride", "nameKey")
self.idKey = config.get("LdapOverride", "idKey")
self.ldapCommand = config.get("LdapOverride", "ldapCommand")
+ self.dfs = instantiateImplementation(config.get("ClusterManager", "dfs"), config)
def registerInstance(self, instance):
return self.baseDataObject.registerInstance(instance)
@@ -53,20 +56,31 @@
def getHosts(self):
return self.baseDataObject.getHosts()
- def getHost(self, id):
- return self.baseDataObject.getHost(id)
+ def getHost(self, _id):
+ return self.baseDataObject.getHost(_id)
def getInstances(self):
return self.baseDataObject.getInstances()
- def getInstance(self, id):
- return self.baseDataObject.getInstance(id)
+ def getInstance(self, _id):
+ return self.baseDataObject.getInstance(_id)
def getNetworks(self):
return self.baseDataObject.getNetworks()
- def getNetwork(self, id):
- return self.baseDataObject.getNetwork(id)
+ def getNetwork(self, _id):
+ return self.baseDataObject.getNetwork(_id)
+
+ def getImages(self):
+ count = 0
+ myList = []
+ for i in self.dfs.list("images"):
+ myFile = self.dfs.getLocalHandle("images/" + i)
+ if os.path.isfile(myFile):
+ image = LocalImages(d={'id':count, 'imageName':i, 'imageSize':humanReadable(self.dfs.stat(myFile)[6])})
+ myList.append(image)
+ count += 1
+ return myList
def fetchFromLdap(self):
now = time.time()
@@ -86,7 +100,7 @@
myUsers[user.id] = user
thisUser = {}
else:
- (key, sep, val) = l.partition(":")
+ (key, __sep, val) = l.partition(":")
key = key.strip()
val = val.strip()
thisUser[key] = val
@@ -101,9 +115,9 @@
self.fetchFromLdap()
return self.users
- def getUser(self, id):
+ def getUser(self, _id):
self.fetchFromLdap()
- return self.users[id]
+ return self.users[_id]
def registerHost(self, hostname, memory, cores, version):
return self.baseDataObject.registerHost(hostname, memory, cores, version)
diff --git a/src/tashi/clustermanager/data/pickled.py b/src/tashi/clustermanager/data/pickled.py
index b3a6e03..043d756 100644
--- a/src/tashi/clustermanager/data/pickled.py
+++ b/src/tashi/clustermanager/data/pickled.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
+import logging
import cPickle
import os
import threading
@@ -24,6 +25,7 @@
class Pickled(FromConfig):
def __init__(self, config):
DataInterface.__init__(self, config)
+ self.log = logging.getLogger(__name__)
self.file = self.config.get("Pickled", "file")
self.locks = {}
self.lockNames = {}
@@ -39,39 +41,51 @@
def cleanInstances(self):
ci = {}
- for i in self.instances.itervalues():
+ for __ignore, i in self.instances.items():
i2 = Instance(d=i.__dict__)
ci[i2.id] = i2
return ci
def cleanHosts(self):
ch = {}
- for h in self.hosts.itervalues():
+ for __ignore, h in self.hosts.items():
h2 = Host(d=h.__dict__)
ch[h2.id] = h2
return ch
def save(self):
- file = open(self.file, "w")
- cPickle.dump((self.cleanHosts(), self.cleanInstances(), self.networks, self.users), file)
- file.close()
+ # XXXstroucki lock here to serialize saves
+ filename = self.file
+ # XXXstroucki could be better
+ tempfile = "%s.new" % filename
+
+ filehandle = open(tempfile, "w")
+ cPickle.dump((self.cleanHosts(), self.cleanInstances(), self.networks, self.users), filehandle)
+ filehandle.close()
+ try:
+ os.rename(tempfile, filename)
+ except OSError:
+ # XXXstroucki: regular save will take place
+ # soon enough, ignore this until locking is
+ # in place.
+ pass
def load(self):
if (os.access(self.file, os.F_OK)):
- file = open(self.file, "r")
- (hosts, instances, networks, users) = cPickle.load(file)
- file.close()
+ filehandle = open(self.file, "r")
+ (hosts, instances, networks, users) = cPickle.load(filehandle)
+ filehandle.close()
else:
(hosts, instances, networks, users) = ({}, {}, {}, {})
self.hosts = hosts
self.instances = instances
self.networks = networks
self.users = users
- for i in self.instances.itervalues():
+ for __ignore, i in self.instances.items():
if (i.id >= self.maxInstanceId):
self.maxInstanceId = i.id + 1
i._lock = threading.Lock()
self.lockNames[i._lock] = "i%d" % (i.id)
- for h in self.hosts.itervalues():
+ for __ignore, h in self.hosts.items():
h._lock = threading.Lock()
self.lockNames[h._lock] = "h%d" % (h.id)
diff --git a/src/tashi/clustermanager/data/sql.py b/src/tashi/clustermanager/data/sql.py
index 64e5681..bfa0273 100644
--- a/src/tashi/clustermanager/data/sql.py
+++ b/src/tashi/clustermanager/data/sql.py
@@ -130,6 +130,10 @@
return h
def registerInstance(self, instance):
+ if type(instance) is not Instance:
+ self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+ raise TypeError
+
self.instanceLock.acquire()
try:
if (instance.id is not None and instance.id not in self.getInstances()):
@@ -173,6 +177,10 @@
return instance
def releaseInstance(self, instance):
+ if type(instance) is not Instance:
+ self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+ raise TypeError
+
self.instanceLock.acquire()
try:
l = self.makeInstanceList(instance)
@@ -191,6 +199,10 @@
self.instanceLock.release()
def removeInstance(self, instance):
+ if type(instance) is not Instance:
+ self.log.exception("Argument is not of type Instance, but of type %s" % (type(instance)))
+ raise TypeError
+
self.instanceLock.acquire()
try:
self.executeStatement("DELETE FROM instances WHERE id = %d" % (instance.id))
@@ -205,6 +217,10 @@
self.instanceLock.release()
def acquireHost(self, hostId):
+ if type(hostId) is not int:
+ self.log.exception("Argument is not of type int, but of type %s" % (type(hostId)))
+ raise TypeError
+
host = self.getHost(hostId)
self.hostLock.acquire()
self.hostLocks[host.id] = self.hostLocks.get(host.id, threading.Lock())
@@ -214,6 +230,10 @@
return host
def releaseHost(self, host):
+ if type(host) is not Host:
+ self.log.exception("Argument is not of type Host, but of type %s" % (type(host)))
+ raise TypeError
+
l = self.makeHostList(host)
s = ""
for e in range(0, len(self.hostOrder)):
@@ -234,14 +254,14 @@
def getHost(self, in_id):
try:
- id = int(in_id)
+ _id = int(in_id)
except:
self.log.exception("Argument to getHost was not integer: %s" % in_id)
- cur = self.executeStatement("SELECT * FROM hosts WHERE id = %d" % id)
+ cur = self.executeStatement("SELECT * FROM hosts WHERE id = %d" % _id)
r = cur.fetchone()
if (r == None):
- raise TashiException(d={'errno':Errors.NoSuchHostId,'msg':"No such hostId - %s" % (id)})
+ raise TashiException(d={'errno':Errors.NoSuchHostId,'msg':"No such hostId - %s" % (_id)})
host = self.makeListHost(r)
return host
@@ -256,16 +276,16 @@
def getInstance(self, in_id):
try:
- id = int(in_id)
+ _id = int(in_id)
except:
self.log.exception("Argument to getInstance was not integer: %s" % in_id)
- cur = self.executeStatement("SELECT * FROM instances WHERE id = %d" % (id))
+ cur = self.executeStatement("SELECT * FROM instances WHERE id = %d" % (_id))
# XXXstroucki should only return one row.
# what about migration? should it be enforced?
r = cur.fetchone()
if (not r):
- raise TashiException(d={'errno':Errors.NoSuchInstanceId, 'msg':"No such instanceId - %d" % (id)})
+ raise TashiException(d={'errno':Errors.NoSuchInstanceId, 'msg':"No such instanceId - %d" % (_id)})
instance = self.makeListInstance(r)
return instance
@@ -278,22 +298,23 @@
networks[network.id] = network
return networks
- def getNetwork(self, id):
- cur = self.executeStatement("SELECT * FROM networks WHERE id = %d" % (id))
+ def getNetwork(self, _id):
+ cur = self.executeStatement("SELECT * FROM networks WHERE id = %d" % (_id))
r = cur.fetchone()
network = Network(d={'id':r[0], 'name':r[1]})
return network
- def getImages(self):
- count = 0
- myList = []
- for i in self.dfs.list("images"):
- myFile = self.dfs.getLocalHandle("images/" + i)
- if os.path.isfile(myFile):
- image = LocalImages(d={'id':count, 'imageName':i, 'imageSize':humanReadable(self.dfs.stat(myFile)[6])})
- myList.append(image)
- count += 1
- return myList
+ def getImages(self):
+ count = 0
+ myList = []
+ for i in self.dfs.list("images"):
+ myFile = self.dfs.getLocalHandle("images/" + i)
+ if os.path.isfile(myFile):
+ image = LocalImages(d={'id':count, 'imageName':i, 'imageSize':humanReadable(self.dfs.stat(myFile)[6])})
+ myList.append(image)
+ count += 1
+
+ return myList
def getUsers(self):
cur = self.executeStatement("SELECT * from users")
@@ -304,8 +325,8 @@
users[user.id] = user
return users
- def getUser(self, id):
- cur = self.executeStatement("SELECT * FROM users WHERE id = %d" % (id))
+ def getUser(self, _id):
+ cur = self.executeStatement("SELECT * FROM users WHERE id = %d" % (_id))
r = cur.fetchone()
user = User(d={'id':r[0], 'name':r[1], 'passwd':r[2]})
return user
@@ -316,20 +337,20 @@
res = cur.fetchall()
for r in res:
if r[1] == hostname:
- id = r[0]
- self.log.warning("Host %s already registered, update will be done" % id)
+ _id = r[0]
+ self.log.warning("Host %s already registered, update will be done" % _id)
s = ""
- host = Host(d={'id': id, 'up': 0, 'decayed': 0, 'state': 1, 'name': hostname, 'memory':memory, 'cores': cores, 'version':version})
+ host = Host(d={'id': _id, 'up': 0, 'decayed': 0, 'state': 1, 'name': hostname, 'memory':memory, 'cores': cores, 'version':version})
l = self.makeHostList(host)
for e in range(0, len(self.hostOrder)):
s = s + self.hostOrder[e] + "=" + l[e]
if (e < len(self.hostOrder)-1):
s = s + ", "
- self.executeStatement("UPDATE hosts SET %s WHERE id = %d" % (s, id))
+ self.executeStatement("UPDATE hosts SET %s WHERE id = %d" % (s, _id))
self.hostLock.release()
return r[0], True
- id = self.getNewId("hosts")
- host = Host(d={'id': id, 'up': 0, 'decayed': 0, 'state': 1, 'name': hostname, 'memory':memory, 'cores': cores, 'version':version})
+ _id = self.getNewId("hosts")
+ host = Host(d={'id': _id, 'up': 0, 'decayed': 0, 'state': 1, 'name': hostname, 'memory':memory, 'cores': cores, 'version':version})
l = self.makeHostList(host)
self.executeStatement("INSERT INTO hosts VALUES (%s, %s, %s, %s, %s, %s, %s, %s)" % tuple(l))
self.hostLock.release()
@@ -353,10 +374,10 @@
maxId = 0 # the first id would be 1
l = []
for r in res:
- id = r[0]
- l.append(id)
- if id >= maxId:
- maxId = id
+ _id = r[0]
+ l.append(_id)
+ if _id >= maxId:
+ maxId = _id
l.sort() # sort to enable comparing with range output
# check if some id is released:
t = range(maxId + 1)
diff --git a/src/tashi/connectionmanager.py b/src/tashi/connectionmanager.py
index 5eeae6c..c9026dc 100644
--- a/src/tashi/connectionmanager.py
+++ b/src/tashi/connectionmanager.py
@@ -15,15 +15,16 @@
# specific language governing permissions and limitations
# under the License.
-from tashi.rpycservices import rpycservices
+from tashi import Connection
#from tashi.rpycservices.rpyctypes import *
class ConnectionManager(object):
- def __init__(self, username, password, port, timeout=10000.0):
+ def __init__(self, username, password, port, timeout=10000.0, authAndEncrypt=False):
self.username = username
self.password = password
self.timeout = timeout
self.port = port
+ self.authAndEncrypt = authAndEncrypt
def __getitem__(self, hostname):
port = self.port
@@ -31,4 +32,4 @@
port = hostname[1]
hostname = hostname[0]
- return rpycservices.client(hostname, port, username=self.username, password=self.password)
+ return Connection(hostname, port, credentials=(self.username, self.password), authAndEncrypt=self.authAndEncrypt)
diff --git a/src/tashi/dfs/vfs.py b/src/tashi/dfs/vfs.py
index d039335..650a805 100644
--- a/src/tashi/dfs/vfs.py
+++ b/src/tashi/dfs/vfs.py
@@ -18,7 +18,6 @@
# implementation of dfs interface functions
import shutil
-import os
import os.path
from dfsinterface import DfsInterface
diff --git a/src/tashi/messaging/gangliapublisher.py b/src/tashi/messaging/gangliapublisher.py
index e589162..2d27947 100644
--- a/src/tashi/messaging/gangliapublisher.py
+++ b/src/tashi/messaging/gangliapublisher.py
@@ -17,7 +17,6 @@
import os
import time
-import types
from tashi import scrubString
diff --git a/src/tashi/messaging/messageBroker.py b/src/tashi/messaging/messageBroker.py
deleted file mode 100644
index c21b57a..0000000
--- a/src/tashi/messaging/messageBroker.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import ConfigParser
-import getopt
-
-import os
-import sys
-import time
-
-import thriftmessaging
-
-options = []
-long_options = ['port=']
-
-# FIXME: should initialize from config file
-params = {"port":1717}
-
-try:
- optlist, args = getopt.getopt(sys.argv[1:], options, long_options)
-except getopt.GetoptError, err:
- print str(err)
- sys.exit(2)
-
-for opt in optlist:
- if opt[0] == "--port":
- try:
- params["port"] = int(opt[1])
- except:
- print "--port expects an integer, got %s" % opt[1]
- sys.exit(0)
-
-print "Starting message broker on port %i" % params["port"]
-broker = thriftmessaging.MessageBrokerThrift(params["port"], daemon=False)
-
diff --git a/src/tashi/messaging/messaging.py b/src/tashi/messaging/messaging.py
deleted file mode 100644
index c421d5c..0000000
--- a/src/tashi/messaging/messaging.py
+++ /dev/null
@@ -1,337 +0,0 @@
-#!/usr/bin/python
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import threading
-import thread
-import sys
-import os
-import socket
-import Queue
-import copy
-import random
-import traceback
-
-from threadpool import ThreadPoolClass, threadpool, ThreadPool
-from threadpool import threadpoolmethod, threaded, synchronized, synchronizedmethod
-
-class RWLock(object):
- """RWLock: Simple reader/writer lock implementation
- FIXME: this implementation will starve writers!
- Methods:
- acquire() : take lock for read access
- release() : release lock from read access
- acquireWrite() : take lock for write access
- releaseWrite() : release lock from write access"""
- def __init__(self):
- self.lock = threading.Condition()
- self.readers = 0
- def acquire(self):
- self.lock.acquire()
- self.readers = self.readers + 1
- self.lock.release()
- def release(self):
- self.lock.acquire()
- self.readers = self.readers - 1
- self.lock.notify()
- self.lock.release()
- def acquireWrite(self):
- self.lock.acquire()
- while self.readers > 0:
- self.lock.wait()
- def releaseWrite(self):
- self.lock.notify()
- self.lock.release()
-
-
-
-class MessageBroker(object):
- def __init__(self):
- self.sublock = RWLock()
- self.subscribers = []
- self.random = random.Random()
- def log(self, msg):
- print "MessageBroker: Got log: '%s'" % str(msg)
- return msg
- def addSubscriber(self, subscriber):
- self.sublock.acquireWrite()
- self.subscribers.append(subscriber)
- l = len(self.subscribers)
- self.sublock.releaseWrite()
- return l
- def publish(self, message):
- removesubs = []
- i = self.random.randint(0,100)
-
-# subscribers = self.getSubscribers()
-# random.shuffle(subscribers)
-
- self.sublock.acquire()
-
- sys.stdout.flush()
-
- for subscriber in self.subscribers:
- try:
- sys.stdout.flush()
- assert(subscriber != self)
- subscriber.publish(message)
- sys.stdout.flush()
- except Exception, e:
- print e
- removesubs.append(subscriber)
-
- self.sublock.release()
-
- if len(removesubs) > 0:
- print "detected %i failed subscribers" % len(removesubs)
- sys.stdout.flush()
- self.sublock.acquireWrite()
- for subscriber in removesubs:
- try:
- self.subscribers.remove(subscriber)
- except:
- pass
- self.sublock.releaseWrite()
- def getSubscribers(self):
- self.sublock.acquire()
- subs = copy.copy(self.subscribers)
- self.sublock.release()
- return subs
- def removeSubscriber(self, subscriber):
- self.sublock.acquireWrite()
- try:
- self.subscribers.remove(subscriber)
- except:
- pass
- self.sublock.releaseWrite()
- def publishList(self, messages):
- for message in messages:
- self.publish(message)
-
-class Subscriber(object):
- def __init__(self, broker, pmatch={}, nmatch={}, synchronized=False):
- self.broker = broker
- self.lock = threading.Lock()
- self.synchronized = synchronized
- self.pmatch={}
- self.nmatch={}
- broker.addSubscriber(self)
- def publish(self, message):
- sys.stdout.flush()
- msg = message
- try:
- if self.synchronized:
- self.lock.acquire()
- msg = self.filter(msg)
- if (msg != None):
- self.handle(msg)
- if self.synchronized:
- self.lock.release()
- except Exception, x:
- if self.synchronized:
- self.lock.release()
- print '%s, %s, %s' % (type(x), x, traceback.format_exc())
- def publishList(self, messages):
- for message in messages:
- self.publish(message)
- def handle(self, message):
- print "Subscriber Default Handler: '%s'" % message
- def setMatch(self, pmatch={}, nmatch={}):
- self.lock.acquire()
- self.pmatch=pmatch
- self.nmatch=nmatch
- self.lock.release()
- def filter(self, message):
- """filter(self, message) : the filter function returns
- the message, modified to be passed to the handler.
- Returning (None) indicates that this is not a message
- we are interested in, and it will not be passed to the
- handler."""
- send = True
- for key in self.pmatch.keys():
- if (not message.has_key(key)):
- send = False
- break
- if self.pmatch[key] != None:
- if message[key] != self.pmatch[key]:
- send = False
- break
- if send == False:
- return None
- for key in message.keys():
- if self.nmatch.has_key(key):
- if self.nmatch[key] == None:
- send = False
- break
- if self.nmatch[key] == message[key]:
- send = False
- break
- if send == False:
- return None
- return message
-
-
-
-class Publisher(object):
- '''Superclass for pub/sub publishers
-
- FIXME: use finer-grained locking'''
- def __init__(self, broker, aggregate=100):
- self.pending = []
- self.pendingLock = threading.Lock()
- self.aggregateSize = aggregate
- self.broker = broker
- @synchronizedmethod
- def publish(self, message):
- if message.has_key('aggregate') and message['aggregate'] == 'True':
- self.aggregate(message)
- return
- else:
- self.broker.publish(message)
- @synchronizedmethod
- def publishList(self, messages):
- self.broker.publishList(messages)
- @synchronizedmethod
- def aggregate(self, message):
- # we can make this lock-less by using a queue for pending
- # messages
- self.pendingLock.acquire()
- self.pending.append(message)
- if len(self.pending) >= self.aggregateSize:
- self.broker.publishList(self.pending)
- self.pending = []
- self.pendingLock.release()
- @synchronizedmethod
- def setBroker(self, broker):
- self.broker = broker
-
-##############################
-# Testing Code
-##############################
-import time
-import unittest
-import sys
-import logging
-
-
-class TestSubscriber(Subscriber):
- def __init__(self, *args, **kwargs):
- self.queue = Queue.Queue()
- Subscriber.__init__(self, *args, **kwargs)
- def handle(self, message):
- self.queue.put(message)
-
-class TestMessaging(unittest.TestCase):
- def setUp(self):
- self.broker = MessageBroker()
- self.publisher = Publisher(self.broker)
- self.subscriber = TestSubscriber(self.broker)
- def testPublish(self):
- self.publisher.publish( {'message':'hello world'} )
- self.assertEqual(self.subscriber.queue.qsize(), 1)
- def testPublishList(self):
- nrmsgs = 10
- msgs = []
- for i in range(nrmsgs):
- msgs.append( {'msgnum':str(i)} )
- self.publisher.publishList( msgs )
- self.assertEqual(self.subscriber.queue.qsize(), nrmsgs)
- def testAggregate(self):
- nrmsgs = self.publisher.aggregateSize
- for i in range(nrmsgs):
- self.assertEqual(self.subscriber.queue.qsize(), 0)
- self.publisher.aggregate( {'msgnum':str(i)} )
- self.assertEqual(self.subscriber.queue.qsize(), nrmsgs)
- def testAggregateKeyword(self):
- nrmsgs = self.publisher.aggregateSize
- for i in range(nrmsgs):
- self.assertEqual(self.subscriber.queue.qsize(), 0)
- self.publisher.publish( {'msgnum':str(i), 'aggregate':'True'} )
- self.assertEqual(self.subscriber.queue.qsize(), nrmsgs)
-
-if __name__ == '__main__':
-
- logging.basicConfig(level=logging.INFO,
- format="%(asctime)s %(levelname)s:\t %(message)s",
- stream=sys.stdout)
-
- suite = unittest.TestLoader().loadTestsFromTestCase(TestMessaging)
- unittest.TextTestRunner(verbosity=2).run(suite)
-
- sys.exit(0)
-
-
-##############################
-# Old/Unused testing code
-##############################
-
-
-
- print 'testing removeSubscriber'
- broker.removeSubscriber(subscriber)
- publisher.publish( {'message':"you shouldn't see this"} )
-
- nsub = NullSubscriber(broker)
- print 'timing publish'
- nrmsg = 100000
- tt = time.time()
- for i in range(nrmsg):
-# publisher.publish( {"message":"hello world!"} )
- publisher.publish( {} )
- tt = time.time() - tt
- print "Published %i messages in %f seconds, %f msg/s"%(nrmsg,
- tt,
- nrmsg/tt)
- broker.removeSubscriber(nsub)
-
- class SlowSubscriber(Subscriber):
- def handle(self, message):
- print 'called slow subscriber with message', message
- time.sleep(1)
- print 'returning from slow subscriber with message', message
- class ThreadedSubscriber(Subscriber):
- @threaded
- def handle(self, message):
- print 'called threaded subscriber with message', message
- time.sleep(1)
- print 'returning from threaded subscriber with message', message
- class ThreadPoolSubscriber(Subscriber, ThreadPoolClass):
- @threadpoolmethod
- def handle(self, message):
- print 'called threadpool subscriber with message', message
- time.sleep(1)
- print 'returning from threadpool subscriber with message', message
-
-
-
- tsub = ThreadedSubscriber(broker)
- for i in range(8):
- publisher.publish( {"msg":str(i)} )
- broker.removeSubscriber(tsub)
- time.sleep(3)
-
- tpsub = ThreadPoolSubscriber(broker)
- for i in range(8):
- publisher.publish( {"msg":str(i)} )
- broker.removeSubscriber(tpsub)
- time.sleep(3)
-
- ssub = SlowSubscriber(broker)
- for i in range(4):
- publisher.publish( {"msg":str(i)} )
- broker.removeSubscriber(ssub)
diff --git a/src/tashi/messaging/messagingloghandler.py b/src/tashi/messaging/messagingloghandler.py
index fd503a1..b757894 100644
--- a/src/tashi/messaging/messagingloghandler.py
+++ b/src/tashi/messaging/messagingloghandler.py
@@ -34,7 +34,9 @@
try:
key = "log_%s_%d_%d" % (self.name, self.msgIndex, int(time.time()*1000))
val = self.format(record)
- tashi.publisher.publish({key:val})
+ #XXXstroucki publisher does not exist
+ (_,_) = (key,val)
+ #tashi.publisher.publish({key:val})
self.msgIndex = self.msgIndex + 1
except Exception, e:
print e
diff --git a/src/tashi/messaging/soapmessaging.py b/src/tashi/messaging/soapmessaging.py
deleted file mode 100755
index be35fc9..0000000
--- a/src/tashi/messaging/soapmessaging.py
+++ /dev/null
@@ -1,229 +0,0 @@
-#! /usr/bin/env python
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from messaging import *
-
-import cPickle
-import soaplib.wsgi_soap
-import cherrypy.wsgiserver
-from soaplib.service import soapmethod
-from soaplib.serializers.primitive import *
-import SOAPpy.WSDL
-import time
-
-class MessageBrokerSoap(soaplib.wsgi_soap.SimpleWSGISoapApp, MessageBroker):
- def __init__(self, port):
- soaplib.wsgi_soap.SimpleWSGISoapApp.__init__(self)
- MessageBroker.__init__(self)
- self.port = port
- def trdfn():
- service = self
- server = cherrypy.wsgiserver.CherryPyWSGIServer(("0.0.0.0",port), service)
- server.start()
- threading.Thread(target=trdfn).start()
-
-
- @soapmethod(Array(String), Array(String), _returns=Null)
- def log(self, keys, values):
- message = {}
- if len(keys) != len(values):
- raise Exception, "Different lengths for keys and values"
- for i in range(len(keys)):
- message[keys[i]] = values[i]
- MessageBroker.log(self, message)
-
- @soapmethod(String, Integer, _returns=Null)
- def addSubscriber(self, host, port):
- subscriber = SubscriberSoapProxy(host, port)
- MessageBroker.addSubscriber(self, subscriber)
-
- @soapmethod(String, Integer, _returns=Null)
- def removeSubscriber(self, host, port):
- # should this method really be able to peek into subscriber.host/port
- subscriber = None
- subscribers = self.getSubscribers()
- for subscriber in subscribers:
- if subscriber.host == host and subscriber.port == port:
- subscriber = subscriber
- if subscriber != None:
- MessageBroker.removeSubscriber(self, subscriber)
-
-
- @soapmethod(Array(String), Array(String), _returns=Null)
- def publish(self, keys, values):
- message = {}
- if len(keys) != len(values):
- raise Exception, "Different lengths for keys and values"
- for i in range(len(keys)):
- message[keys[i]] = values[i]
- MessageBroker.publish(self, message)
-
-
-
-class MessageBrokerSoapProxy(object):
- def __init__(self, host, port):
- self.host = host
- self.port = port
- self.connection = SOAPpy.WSDL.Proxy("http://%s:%i/.wsdl"%(host,port))
- def log(self, message):
- keys = []
- values = []
- for k,v in message.items():
- keys.append(k)
- values.append(v)
- self.connection.log(keys=keys, values=values)
- def addSubscriber(self, subscriber):
- self.connection.addSubscriber(host=subscriber.host, port=subscriber.port)
- def publish(self, message):
- keys = []
- values = []
- for k,v in message.items():
- keys.append(k)
- values.append(v)
- self.connection.publish(keys=keys, values=values)
- def removeSubscriber(self, subscriber):
- self.connection.removeSubscriber(host=subscriber.host, port=subscriber.port)
-
-
-
-
-class SubscriberSoap(soaplib.wsgi_soap.SimpleWSGISoapApp, Subscriber):
- def __init__(self, broker, port, synchronized=False):
- soaplib.wsgi_soap.SimpleWSGISoapApp.__init__(self)
- Subscriber.__init__(self, synchronized=synchronized)
- self.host = socket.gethostname()
- self.port = port
- self.broker = broker
- self.server = None
- def trdfn():
- service = self
- self.server = cherrypy.wsgiserver.CherryPyWSGIServer(("0.0.0.0",port), service)
- self.server.start()
- threading.Thread(target=trdfn).start()
-# broker.log("Subscriber started")
- broker.addSubscriber(self)
- @soapmethod(Array(String), Array(String), _returns=Integer)
- def publish(self, keys, values):
- message = {}
- if len(keys) != len(values):
- raise Exception, "Different lengths for keys and values"
- for i in range(len(keys)):
- message[keys[i]] = values[i]
- Subscriber.publish(self, message)
- return 0
- def stop(self):
- self.server.stop()
-
-class SubscriberSoapProxy(object):
- def __init__(self, host, port):
- self.host = host
- self.port = port
- self.connection = SOAPpy.WSDL.Proxy("http://%s:%i/.wsdl"%(host,port))
- def publish(self, message):
- keys = []
- values = []
- for k,v in message.items():
- keys.append(k)
- values.append(v)
- self.connection.publish(keys=keys, values=values)
-
-
-####################
-# Testing Code
-####################
-
-class CustomSubscriber(SubscriberSoap):
- def handle(self, message):
- print "Custom Subscriber: '%s'" % str(message)
-
-class NullSubscriber(SubscriberSoap):
- def handle(self, message):
- pass
-
-
-if __name__ == '__main__':
- try:
- portnum = 1717
-
- print "\ntesting message broker"
- broker = MessageBrokerSoap(portnum)
- proxy = MessageBrokerSoapProxy("localhost", portnum)
- portnum = portnum + 1
-
- print "\ntesting log function"
- proxy.log( {"message":"Hello World!"} )
-# proxy.log("It looks like log works")
-
- print "\ntesting subscriber proxy"
- subscriber = SubscriberSoap(proxy, portnum)
- portnum = portnum + 1
-
- print "\ntesting custom subscriber"
- csub = CustomSubscriber(proxy, portnum)
- portnum = portnum + 1
-
- print "\ntesting publish"
- proxy.publish( {"message":"Hello World!"} )
-
- print "\ntesting stop"
- subscriber.stop()
- proxy.publish( {"message":"Everybody here?"} )
-
- print "\ntesting removeSubscriber"
- proxy.removeSubscriber(csub)
- proxy.publish( {"message":"Nobody home"} )
- proxy.addSubscriber(csub)
- proxy.publish( {"message":"You're back!"} )
-
- print "\ntesting filter"
- csub.setMatch( {"print":"yes"} )
- proxy.publish( {"print":"yes", "message":"this should be printed"} )
- proxy.publish( {"print":"no", "message":"this should NOT be printed"} )
- csub.setMatch()
-
- print "\ntesting publish performance"
- proxy.removeSubscriber(csub)
- nrmsg = 10000
- tt = time.time()
- for i in range(nrmsg):
- proxy.publish( {"message":"msg %i"%i} )
- tt = time.time() - tt
- print "Published %i messages in %f seconds, %f msg/s"%(nrmsg,
- tt,
- nrmsg/tt)
-
- print "\ntesting publish/subscribe performance"
- nsub = NullSubscriber(proxy, portnum)
- portnum = portnum + 1
- nrmsg = 10000
- tt = time.time()
- for i in range(nrmsg):
- proxy.publish( {"message":"msg %i"%i} )
- tt = time.time() - tt
- print "Published %i messages in %f seconds, %f msg/s"%(nrmsg,
- tt,
- nrmsg/tt)
-
-
-
- except Exception, e:
-# raise e
- print e
- sys.exit(0)
- sys.exit(0)
diff --git a/src/tashi/messaging/tashimessaging.py b/src/tashi/messaging/tashimessaging.py
deleted file mode 100644
index 006400f..0000000
--- a/src/tashi/messaging/tashimessaging.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from thriftmessaging import *
-import logging
-import Queue
-from ConfigParser import ConfigParser
-import time
-import socket
-import signal
-
-class TashiLogHandler(logging.Handler, PublisherThrift):
- def __init__(self, config, *args, **kwargs):
- self.messages = Queue.Queue()
- self.config = config
- logging.Handler.__init__(self, *args, **kwargs)
- PublisherThrift.__init__(self,
- config.get('MessageBroker', 'host'),
- int(config.get('MessageBroker', 'port')))
- def emit(self, record):
- # 'args', 'created', 'exc_info', 'exc_text', 'filename',
- # 'funcName', 'getMessage', 'levelname', 'levelno', 'lineno',
- # 'module', 'msecs', 'msg', 'name', 'pathname', 'process',
- # 'relativeCreated', 'thread', 'threadName']
- msg = {}
- # args
- # created
- # exc_info
- # exc_text
- msg['log-filename'] = str(record.filename)
- msg['log-funcname'] = str(record.funcName)
- msg['log-levelname'] = str(record.levelname)
- msg['log-level'] = str(record.levelno)
- msg['log-lineno'] = str(record.lineno)
- msg['log-module'] = str(record.module)
- msg['log-msecs'] = str(record.msecs)
- msg['log-message'] = str(record.msg)
- msg['log-name'] = str(record.name)
- msg['log-pathname'] = str(record.pathname)
- msg['log-process'] = str(record.process)
- # relativeCreated
- msg['log-thread'] = str(record.thread)
- msg['log-threadname'] = str(record.threadName)
-
- # standard message fields
- msg['timestamp'] = str(time.time())
- msg['hostname'] = socket.gethostname()
- msg['message-type'] = 'log'
-
- self.messages.put(msg)
- self.publish(msg)
-
-class TashiSubscriber(SubscriberThrift):
- def __init__(self, config, port, **kwargs):
- sys.stdout.flush()
- brokerPort = int(config.get('MessageBroker', 'port'))
- self.broker = MessageBrokerThriftProxy(config.get('MessageBroker', 'host'), brokerPort)
- SubscriberThrift.__init__(self, self.broker, port, **kwargs)
-
-
-
-##############################
-# Test Code
-##############################
-import unittest
-import sys
-
-class TestTashiSubscriber(TashiSubscriber):
- def __init__(self, *args, **kwargs):
- self.messageQueue = Queue.Queue()
- TashiSubscriber.__init__(self, *args, **kwargs)
- def handle(self, message):
- self.messageQueue.put(message)
-
-
-def incrementor(start = 0):
- while True:
- a = start
- start = start + 1
- yield a
-increment = incrementor()
-
-class TestTashiMessaging(unittest.TestCase):
- def setUp(self):
- self.configFiles = [ '../../../etc/TestConfig.cfg']
- self.config = ConfigParser()
- self.configFiles = self.config.read(self.configFiles)
- self.port = int(self.config.get('MessageBroker', 'port'))
-
- try:
- self.brokerPid = os.spawnlpe(os.P_NOWAIT, 'python', 'python',
- './messageBroker.py',
- '--port', str(self.port),
- os.environ)
- self.port = self.port + 1
- # FIXME: what's the best way to wait for the broker to be ready?
- time.sleep(1)
- except Exception, e:
- sys.exit(0)
- self.initialized = True
- self.log = logging.getLogger('TestTashiMessaging')
- self.handler = TashiLogHandler(self.config)
- self.log.addHandler(self.handler)
- self.sub = TestTashiSubscriber(self.config, int(self.port) + increment.next())
- def tearDown(self):
- os.kill(self.brokerPid, signal.SIGKILL)
- # FIXME: wait for the port to be ready again
- time.sleep(2)
- self.log.removeHandler(self.handler)
-# self.sub.broker.removeSubscriber(self.sub)
- pass
- def testLog(self):
- self.log.log(50, "Hello World!")
- self.handler.messages.get(timeout=5)
- self.sub.messageQueue.get(timeout=5)
- self.assertEqual(self.handler.messages.qsize(), 0)
- self.assertEqual(self.sub.messageQueue.qsize(), 0)
- def testPublish(self):
- sys.stdout.flush()
- self.port = self.port + 1
- self.handler.publish({'message':'hello world'})
- self.sub.messageQueue.get(timeout=5)
- self.assertEqual(self.sub.messageQueue.qsize(), 0)
-
-
-if __name__=='__main__':
-
-
-# logging.basicConfig(level=logging.INFO,
-# format="%(asctime)s %(levelname)s:\t %(message)s",
-# stream=sys.stdout)
-
- suite = unittest.TestLoader().loadTestsFromTestCase(TestTashiMessaging)
- unittest.TextTestRunner(verbosity=2).run(suite)
diff --git a/src/tashi/messaging/threadpool.py b/src/tashi/messaging/threadpool.py
deleted file mode 100644
index 5684ef2..0000000
--- a/src/tashi/messaging/threadpool.py
+++ /dev/null
@@ -1,305 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import threading
-import time
-import Queue
-import logging
-
-_log = logging.getLogger('tashi.messaging.threadpool')
-
-def threaded(func):
- def fn(*args, **kwargs):
- thread = threading.Thread(target=func, args=args, kwargs=kwargs)
- thread.start()
- return thread
- return fn
-
-
-class ThreadPool(Queue.Queue):
- def __init__(self, size=8, maxsize=0):
- Queue.Queue.__init__(self, maxsize)
- for i in range(size):
- thread = threading.Thread(target=self._worker)
- thread.setDaemon(True)
- thread.start()
- def _worker(self):
- while True:
- try:
- func, args, kwargs = self.get()
- func(*args, **kwargs)
- except Exception, e:
- _log.error(e)
- # FIXME: do something smarter here, backtrace, log,
- # allow user-defined error handling...
-
- def submit(self, func, *args, **kwargs):
- self.put((func, args, kwargs))
- def submitlist(self, func, args, kwargs):
- self.put((func, args, kwargs))
-
-class ThreadPoolClass:
- def __init__(self, size=8, maxsize=0):
- self._threadpool_pool = ThreadPool(size=size, maxsize=maxsize)
-
-
-def threadpool(pool):
- def dec(func):
- def fn(*args, **kwargs):
- pool.submit(func, *args, **kwargs)
- return fn
- return dec
-
-def threadpoolmethod(meth):
- def fn(*args, **kwargs):
- try:
- pool = args[0]._threadpool_pool
- except AttributeError:
- pool = args[0].__dict__.setdefault('_threadpool_pool', ThreadPool())
- # FIXME: how do we check parent class?
-# assert args[0].__class__ == ThreadPoolClass, "Thread pool method must be in a ThreadPoolClass"
- pool.submit(meth, *args, **kwargs)
- return fn
-
-def synchronized(lock=None):
- _log.debug('synchronized decorator factory called')
- if lock==None:
- lock = threading.RLock()
- def dec(func):
- _log.debug('synchronized decorator called')
- def fn(*args, **kwargs):
- _log.debug('getting sync lock')
- lock.acquire()
- _log.debug('got sync lock')
- ex = None
- try:
- r = func(*args, **kwargs)
- except Exception, e:
- ex = e
- _log.debug('releasing sync lock')
- lock.release()
- _log.debug('released sync lock')
- if ex != None:
- raise e
- return r
- return fn
- return dec
-
-def synchronizedmethod(func):
- def fn(*args, **kwargs):
- try:
- lock = args[0]._synchronized_lock
- except AttributeError:
- lock = args[0].__dict__.setdefault('_synchronized_lock', threading.RLock())
- lock.acquire()
- ex = None
- try:
- func(*args, **kwargs)
- except Exception, e:
- ex = e
- lock.release()
- if ex != None:
- raise e
- return fn
-
-
-##############################
-# Test Code
-##############################
-import unittest
-import sys
-import time
-
-class TestThreadPool(unittest.TestCase):
- def setUp(self):
- self.errmargin = 0.5
-
- def testUnthreaded(self):
- queue = Queue.Queue()
- def slowfunc(sleep=1):
- time.sleep(sleep)
- queue.put(None)
- tt = time.time()
- for i in range(4):
- slowfunc()
- for i in range(4):
- queue.get()
- tt = time.time() - tt
- self.assertAlmostEqual(tt, 4, 1)
-
- def testThreaded(self):
- queue = Queue.Queue()
- @threaded
- def slowthreadfunc(sleep=1):
- time.sleep(sleep)
- queue.put(None)
- tt = time.time()
- for i in range(8):
- slowthreadfunc()
- for i in range(8):
- queue.get()
- tt = time.time() - tt
- self.assertAlmostEqual(tt, 1, 1)
-
- def testThreadPool(self):
- pool = ThreadPool(size=4)
- queue = Queue.Queue()
- @threadpool(pool)
- def slowpoolfunc(sleep=1):
- time.sleep(sleep)
- queue.put(None)
- tt = time.time()
- for i in range(8):
- slowpoolfunc()
- for i in range(8):
- queue.get()
- tt = time.time() - tt
- self.assertAlmostEqual(tt, 2, 1)
-
- def testUnthreadedMethod(self):
- queue = Queue.Queue()
- class slowclass:
- def __init__(self, sleep=1):
- self.sleep=sleep
- def beslow(self):
- time.sleep(self.sleep)
- queue.put(None)
- sc = slowclass()
- tt = time.time()
- for i in range(4):
- sc.beslow()
- for i in range(4):
- queue.get()
- tt = time.time() - tt
- self.assertAlmostEqual(tt, 4, 1)
-
- def testThreadedMethod(self):
- queue = Queue.Queue()
- class slowclass:
- def __init__(self, sleep=1):
- self.sleep=sleep
- @threaded
- def beslow(self):
- time.sleep(self.sleep)
- queue.put(None)
- sc = slowclass()
- tt = time.time()
- for i in range(4):
- sc.beslow()
- for i in range(4):
- queue.get()
- tt = time.time() - tt
- self.assertAlmostEqual(tt, 1, 1)
-
- def testThreadPoolMethod(self):
- queue = Queue.Queue()
- class slowclass:
- def __init__(self, sleep=1):
- self.sleep=sleep
- @threadpoolmethod
- def beslow(self):
- time.sleep(self.sleep)
- queue.put(None)
- sc = slowclass()
- tt = time.time()
- for i in range(16):
- sc.beslow()
- for i in range(16):
- queue.get()
- tt = time.time() - tt
- self.assertAlmostEqual(tt, 2, 1)
-
- def testSynchronized(self):
- queue = Queue.Queue()
- @synchronized()
- def addtoqueue():
- time.sleep(1)
- queue.put(None)
- @threaded
- def slowthreadfunc():
- addtoqueue()
- tt = time.time()
- for i in range(4):
- slowthreadfunc()
- for i in range(4):
- queue.get()
- tt = time.time() - tt
- self.assertAlmostEqual(tt, 4, 1)
-
- def testSynchronizedMethod(self):
- queue = Queue.Queue()
- class addtoqueue:
- @synchronizedmethod
- def addtoqueue1(self):
- time.sleep(1)
- queue.put(None)
- @synchronizedmethod
- def addtoqueue2(self):
- time.sleep(1)
- queue.put(None)
- atc = addtoqueue()
- @threaded
- def slowthreadfunc1():
- atc.addtoqueue1()
- @threaded
- def slowthreadfunc2():
- atc.addtoqueue2()
- tt = time.time()
- for i in range(4):
- slowthreadfunc1()
- slowthreadfunc2()
- for i in range(8):
- queue.get()
- tt = time.time() - tt
- self.assertAlmostEqual(tt, 8, 1)
-
- def testUnsynchronizedMethod(self):
- queue = Queue.Queue()
- class addtoqueue:
- def addtoqueue1(self):
- time.sleep(1)
- queue.put(None)
- def addtoqueue2(self):
- time.sleep(1)
- queue.put(None)
- atc = addtoqueue()
- @threaded
- def slowthreadfunc1():
- atc.addtoqueue1()
- @threaded
- def slowthreadfunc2():
- atc.addtoqueue2()
- tt = time.time()
- for i in range(4):
- slowthreadfunc1()
- slowthreadfunc2()
- for i in range(8):
- queue.get()
- tt = time.time() - tt
- self.assertAlmostEqual(tt, 1, 1)
-
-
-
-if __name__=='__main__':
- import sys
-
- logging.basicConfig(level=logging.INFO,
- format="%(asctime)s %(levelname)s:\t %(message)s",
- stream=sys.stdout)
-
- suite = unittest.TestLoader().loadTestsFromTestCase(TestThreadPool)
- unittest.TextTestRunner(verbosity=2).run(suite)
diff --git a/src/tashi/messaging/thriftmessaging.py b/src/tashi/messaging/thriftmessaging.py
deleted file mode 100755
index 0c73ff0..0000000
--- a/src/tashi/messaging/thriftmessaging.py
+++ /dev/null
@@ -1,278 +0,0 @@
-#!/usr/bin/env python
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import sys
-import time
-import socket
-import traceback
-import threading
-
-sys.path.append('./gen-py')
-import tashi.messaging.messagingthrift
-import tashi.messaging.messagingthrift.MessageBrokerThrift
-import tashi.messaging.messagingthrift.SubscriberThrift
-from tashi.messaging.messagingthrift.ttypes import *
-
-from thrift import Thrift
-from thrift.transport import TSocket
-from thrift.transport import TTransport
-from thrift.protocol import TBinaryProtocol
-from thrift.server import TServer
-
-from tashi import ConnectionManager
-
-from tashi.messaging.messaging import *
-from tashi.messaging.threadpool import ThreadPoolClass, threadpool, ThreadPool, threadpoolmethod, threaded
-
-class MessageBrokerThrift(MessageBroker):
- def __init__(self, port, daemon=True):
- MessageBroker.__init__(self)
- self.processor = tashi.messaging.messagingthrift.MessageBrokerThrift.Processor(self)
- self.transport = TSocket.TServerSocket(port)
- self.tfactory = TTransport.TBufferedTransportFactory()
- self.pfactory = TBinaryProtocol.TBinaryProtocolFactory()
- self.proxy = ConnectionManager(tashi.messaging.messagingthrift.SubscriberThrift.Client, 0)
- self.ready = threading.Event()
-# self.server = TServer.TSimpleServer(self.processor,
-# self.transport,
-# self.tfactory,
-# self.pfactory)
-# self.server = TServer.TThreadPoolServer(self.processor,
-# self.transport,
-# self.tfactory,
-# self.pfactory)
- self.server = TServer.TThreadedServer(self.processor,
- self.transport,
- self.tfactory,
- self.pfactory)
- self.publishCalls = 0
-
- def ssvrthrd():
- try:
- # FIXME: Race condition, the ready event should be set after
- # starting the server. However, server.serve()
- # doesn't return under normal circumstances. This
- # seems to work in practice, even though it's clearly
- # wrong.
- self.ready.set()
- self.server.serve()
- except Exception, e:
- print e
- sys.stdout.flush()
- pass
- svt = threading.Thread(target=ssvrthrd)
- svt.setDaemon(daemon)
- svt.start()
- self.ready.wait()
- def log(self, message):
- MessageBroker.log(self, message)
- @synchronizedmethod
- def addSubscriber(self, host, port):
- subscribers = self.getSubscribers()
- for sub in subscribers:
- if sub.host == host and sub.port == port:
- return
- subscriber = SubscriberThriftProxy(host, port, self.proxy)
- MessageBroker.addSubscriber(self, subscriber)
- def removeSubscriber(self, host, port):
- subscriber = None
- subscribers = self.getSubscribers()
- for sub in subscribers:
- if sub.host == host and sub.port == port:
- subscriber = sub
- if subscriber != None:
- MessageBroker.removeSubscriber(self, subscriber)
- @synchronizedmethod
- def publish(self, message):
- self.publishCalls = self.publishCalls + 1
- sys.stdout.flush()
- MessageBroker.publish(self, message)
-
-class MessageBrokerThriftProxy:
- def __init__(self, host, port):
- self.host = host
- self.port = port
- self.proxy = ConnectionManager(tashi.messaging.messagingthrift.MessageBrokerThrift.Client,port)
- @synchronizedmethod
- def log(self, message):
- self.proxy[self.host, self.port].log(message)
- @synchronizedmethod
- def publish(self, message):
- self.proxy[self.host, self.port].publish(message)
- @synchronizedmethod
- def publishList(self, messages):
- self.proxy[self.host, self.port].publishList(messages)
- @synchronizedmethod
- def addSubscriber(self, subscriber):
- self.proxy[self.host, self.port].addSubscriber(host=subscriber.host, port=subscriber.port)
- @synchronizedmethod
- def removeSubscriber(self, subscriber):
- self.proxy[self.host, self.port].removeSubscriber(host=subscriber.host, port=subscriber.port)
-
-
-
-class SubscriberThrift(Subscriber, threading.Thread):
- def __init__(self, broker, port, synchronized=False):
- self.host = socket.gethostname()
- self.port = port
- self.processor = tashi.messaging.messagingthrift.SubscriberThrift.Processor(self)
- self.transport = TSocket.TServerSocket(port)
- self.tfactory = TTransport.TBufferedTransportFactory()
- self.pfactory = TBinaryProtocol.TBinaryProtocolFactory()
- self.server = TServer.TThreadedServer(self.processor,
- self.transport,
- self.tfactory,
- self.pfactory)
- def ssvrthrd():
- try:
- self.server.serve()
- except Exception, e:
- pass
-
-
- self.thread = threading.Thread(target=ssvrthrd)
- self.thread.setDaemon(True)
- self.thread.start()
-
- # We have to call this AFTER initializing our server, so that
- # the broker can contact us
- # Wrap this in a try/catch because the broker may not be online yet
- try:
- Subscriber.__init__(self, broker, synchronized=synchronized)
- except:
- pass
- threading.Thread.__init__(self)
- self.setDaemon(True)
- self.start()
-
- def stop(self):
-# # FIXME: this is broken, there is no clear way to stop a
-# # Thrift server
- self.broker.removeSubscriber(self)
- self.transport.close()
- def run(self):
- while(True):
- # renew subscription every 5 min
- try:
- self.broker.addSubscriber(self)
- except:
- pass
- time.sleep(5*60)
-
-class SubscriberThriftProxy:
- def __init__(self, host, port, proxy, aggregate = 100):
- self.host = host
- self.port = port
- self.proxy = proxy
- # for some reason, thrift clients are not thread-safe, lock during send
- self.lock = threading.Lock()
- self.pending = []
- self.aggregateSize = aggregate
- def publish(self, message):
- self.lock.acquire()
- sys.stdout.flush()
- if message.has_key('aggregate') and message['aggregate'] == 'True':
- self.pending.append(message)
- if len(self.pending) >= self.aggregateSize:
- try:
- self.proxy[self.host, self.port].publishList(self.pending)
- except Exception, e:
- print e
- self.lock.release()
- raise e
- self.pending = []
- else:
- try:
- self.proxy[self.host, self.port].publish(message)
- except Exception, e:
- sys.stdout.flush()
- print e
- self.lock.release()
- raise e
- self.lock.release()
-
-class PublisherThrift(Publisher):
- def __init__(self, host, port):
- self.host = host
- self.port = port
- self.broker = MessageBrokerThriftProxy(host, port)
- Publisher.__init__(self, self.broker)
-
-####################
-# Testing Code
-####################
-
-class TestSubscriberThrift(SubscriberThrift):
- def __init__(self, *args, **kwargs):
- self.queue = Queue.Queue()
- SubscriberThrift.__init__(self, *args, **kwargs)
- def handle(self, message):
- self.queue.put(message)
-
-portnum = 1718
-class TestThriftMessaging(unittest.TestCase):
- def setUp(self):
- global portnum
- self.broker = MessageBrokerThrift(portnum)
- self.brokerPort = portnum
- portnum = portnum + 1
- self.proxy = MessageBrokerThriftProxy('localhost', self.brokerPort)
- self.publisher = PublisherThrift('localhost', self.brokerPort)
- self.subscriber = TestSubscriberThrift(self.proxy, portnum)
- portnum = portnum + 1
- def tearDown(self):
- pass
- def testSetUp(self):
- pass
- def testPublish(self):
- self.publisher.publish( {'message':'hello world'} )
- self.subscriber.queue.get(True, timeout=5)
- self.assertEqual(self.subscriber.queue.qsize(), 0)
- def testPublishList(self):
- nrmsgs = 10
- msgs = []
- for i in range(nrmsgs):
- msgs.append( {'msgnum':str(i)} )
- self.publisher.publishList( msgs )
- for i in range(nrmsgs):
- self.subscriber.queue.get(True, timeout=5)
- self.assertEqual(self.subscriber.queue.qsize(), 0)
- def testAggregate(self):
- nrmsgs = self.publisher.aggregateSize
- for i in range(nrmsgs):
- self.assertEqual(self.subscriber.queue.qsize(), 0)
- self.publisher.aggregate( {'msgnum':str(i)} )
- for i in range(nrmsgs):
- self.subscriber.queue.get(True, timeout=5)
- self.assertEqual(self.subscriber.queue.qsize(), 0)
- def testAggregateKeyword(self):
- nrmsgs = self.publisher.aggregateSize
- for i in range(nrmsgs):
- self.assertEqual(self.subscriber.queue.qsize(), 0)
- self.publisher.publish( {'msgnum':str(i), 'aggregate':'True'} )
- for i in range(nrmsgs):
- self.subscriber.queue.get(True, timeout=5)
- self.assertEqual(self.subscriber.queue.qsize(), 0)
-
-
-if __name__=='__main__':
- suite = unittest.TestLoader().loadTestsFromTestCase(TestThriftMessaging)
- unittest.TextTestRunner(verbosity=2).run(suite)
-
-
diff --git a/src/tashi/nodemanager/nodemanager.py b/src/tashi/nodemanager/nodemanager.py
index 66d2d5b..b725b0a 100755
--- a/src/tashi/nodemanager/nodemanager.py
+++ b/src/tashi/nodemanager/nodemanager.py
@@ -18,30 +18,57 @@
# under the License.
import logging.config
-import signal
import sys
+import os
-from tashi.util import instantiateImplementation, getConfig, debugConsole, signalHandler
+from tashi.util import instantiateImplementation, debugConsole
import tashi
from tashi import boolean
from tashi.rpycservices import rpycservices
+from tashi.utils.config import Config
+
from rpyc.utils.server import ThreadedServer
from rpyc.utils.authenticators import TlsliteVdbAuthenticator
-@signalHandler(signal.SIGTERM)
-def handleSIGTERM(signalNumber, stackFrame):
- sys.exit(0)
-
def main():
- global config, dfs, vmm, service, server, log, notifier
+ global config, log
- (config, configFiles) = getConfig(["NodeManager"])
- publisher = instantiateImplementation(config.get("NodeManager", "publisher"), config)
- tashi.publisher = publisher
+ config = Config(["NodeManager"])
+ configFiles = config.getFiles()
+
logging.config.fileConfig(configFiles)
log = logging.getLogger(__name__)
log.info('Using configuration file(s) %s' % configFiles)
+
+ # handle keyboard interrupts (http://code.activestate.com/recipes/496735-workaround-for-missed-sigint-in-multithreaded-prog/)
+ child = os.fork()
+
+ if child == 0:
+ startNodeManager()
+ # shouldn't exit by itself
+ sys.exit(0)
+
+ else:
+ # main
+ try:
+ os.waitpid(child, 0)
+ except KeyboardInterrupt:
+ log.info("Exiting node manager after receiving a SIGINT signal")
+ os._exit(0)
+ except Exception:
+ log.exception("Abnormal termination of node manager")
+ os._exit(-1)
+
+ log.info("Exiting node manager after service thread exited")
+ os._exit(-1)
+
+ return
+
+def startNodeManager():
+ global config, dfs, vmm, service, server, log, notifier
+ publisher = instantiateImplementation(config.get("NodeManager", "publisher"), config)
+ tashi.publisher = publisher
dfs = instantiateImplementation(config.get("NodeManager", "dfs"), config)
vmm = instantiateImplementation(config.get("NodeManager", "vmm"), config, dfs, None)
service = instantiateImplementation(config.get("NodeManager", "service"), config, vmm)
@@ -51,6 +78,9 @@
users = {}
users[config.get('AllowedUsers', 'clusterManagerUser')] = config.get('AllowedUsers', 'clusterManagerPassword')
authenticator = TlsliteVdbAuthenticator.from_dict(users)
+
+ # XXXstroucki: ThreadedServer is liable to have exceptions
+ # occur within if an endpoint is lost.
t = ThreadedServer(service=rpycservices.ManagerService, hostname='0.0.0.0', port=int(config.get('NodeManagerService', 'port')), auto_register=False, authenticator=authenticator)
else:
t = ThreadedServer(service=rpycservices.ManagerService, hostname='0.0.0.0', port=int(config.get('NodeManagerService', 'port')), auto_register=False)
@@ -59,14 +89,11 @@
t.service._type = 'NodeManagerService'
debugConsole(globals())
-
- try:
- t.start()
- except KeyboardInterrupt:
- handleSIGTERM(signal.SIGTERM, None)
- except Exception, e:
- sys.stderr.write(str(e) + "\n")
- sys.exit(-1)
+
+ t.start()
+ # shouldn't exit by itself
+ sys.exit(0)
+
if __name__ == "__main__":
main()
diff --git a/src/tashi/nodemanager/nodemanagerservice.py b/src/tashi/nodemanager/nodemanagerservice.py
index c493ac9..1955ecf 100755
--- a/src/tashi/nodemanager/nodemanagerservice.py
+++ b/src/tashi/nodemanager/nodemanagerservice.py
@@ -5,63 +5,57 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
-# under the License.
+# under the License.
import logging
import socket
import threading
import time
-from tashi.rpycservices import rpycservices
from tashi.rpycservices.rpyctypes import InstanceState, TashiException, Errors, Instance
from tashi import boolean, vmStates, ConnectionManager
-import tashi
-
class NodeManagerService(object):
"""RPC handler for the NodeManager
-
- Perhaps in the future I can hide the dfs from the
+
+ Perhaps in the future I can hide the dfs from the
VmControlInterface and do all dfs operations here?"""
-
+
def __init__(self, config, vmm):
+ # XXXstroucki: vmm will wait for this constructor to complete
self.config = config
self.vmm = vmm
- self.cmHost = config.get("NodeManagerService", "clusterManagerHost")
- self.cmPort = int(config.get("NodeManagerService", "clusterManagerPort"))
- self.authAndEncrypt = boolean(config.get('Security', 'authAndEncrypt'))
+ self.cmHost = self.config.get("NodeManagerService", "clusterManagerHost")
+ self.cmPort = int(self.config.get("NodeManagerService", "clusterManagerPort"))
+ self.authAndEncrypt = boolean(self.config.get('Security', 'authAndEncrypt'))
if self.authAndEncrypt:
- self.username = config.get('AccessClusterManager', 'username')
- self.password = config.get('AccessClusterManager', 'password')
+ self.username = self.config.get('AccessClusterManager', 'username')
+ self.password = self.config.get('AccessClusterManager', 'password')
else:
self.username = None
self.password = None
self.log = logging.getLogger(__file__)
- self.convertExceptions = boolean(config.get('NodeManagerService', 'convertExceptions'))
- self.registerFrequency = float(config.get('NodeManagerService', 'registerFrequency'))
- self.statsInterval = float(self.config.get('NodeManagerService', 'statsInterval'))
- self.registerHost = boolean(config.get('NodeManagerService', 'registerHost'))
+ self.convertExceptions = boolean(self.config.get('NodeManagerService', 'convertExceptions'))
+ self.registerFrequency = float(self.config.get('NodeManagerService', 'registerFrequency'))
+ self.statsInterval = float(self.config.get('NodeManagerService', 'statsInterval', default = 0))
+ self.registerHost = boolean(self.config.get('NodeManagerService', 'registerHost'))
try:
self.cm = ConnectionManager(self.username, self.password, self.cmPort)[self.cmHost]
except:
self.log.exception("Could not connect to CM")
+ # XXXstroucki: raise?
return
- self.accountingHost = None
- self.accountingPort = None
- try:
- self.accountingHost = self.config.get('NodeManagerService', 'accountingHost')
- self.accountingPort = self.config.getint('NodeManagerService', 'accountingPort')
- except:
- pass
+ self.accountingHost = self.config.get('NodeManagerService', 'accountingHost')
+ self.accountingPort = self.config.getint('NodeManagerService', 'accountingPort')
self.notifyCM = []
@@ -76,25 +70,25 @@
self.__registerHost()
+ # XXXstroucki: should make an effort to retry
+ # This can time out now with an exception
self.id = self.cm.registerNodeManager(self.host, self.instances.values())
- # XXXstroucki cut cross check for NM/VMM state
-
# start service threads
- threading.Thread(target=self.__registerWithClusterManager).start()
- threading.Thread(target=self.__statsThread).start()
-
+ threading.Thread(name="registerWithClusterManager", target=self.__registerWithClusterManager).start()
+ threading.Thread(name="statsThread", target=self.__statsThread).start()
+
def __initAccounting(self):
- self.accountBuffer = []
- self.accountLines = 0
- self.accountingClient = None
- try:
- if (self.accountingHost is not None) and \
- (self.accountingPort is not None):
- self.accountingClient=rpycservices.client(self.accountingHost, self.accountingPort)
- except:
- self.log.exception("Could not init accounting")
-
+ self.accountBuffer = []
+ self.accountLines = 0
+ self.accountingClient = None
+ try:
+ if (self.accountingHost is not None) and \
+ (self.accountingPort is not None):
+ self.accountingClient = ConnectionManager(self.username, self.password, self.accountingPort)[self.accountingHost]
+ except:
+ self.log.exception("Could not init accounting")
+
def __loadVmInfo(self):
try:
self.instances = self.vmm.getInstances()
@@ -105,15 +99,19 @@
# send data to CM
# XXXstroucki adapt this for accounting?
def __flushNotifyCM(self):
- start = time.time()
# send data to CM, adding message to buffer if
# it fails
try:
notifyCM = []
try:
while (len(self.notifyCM) > 0):
+ # XXXstroucki ValueError: need more than 1 value to unpack
+ # observed here. How?
value = self.notifyCM.pop(0)
- (instanceId, newInst, old, success) = value
+ try:
+ (instanceId, newInst, old, success) = value
+ except:
+ self.log.exception("problem with value: %s" % value)
try:
self.cm.vmUpdate(instanceId, newInst, old)
except TashiException, e:
@@ -135,7 +133,7 @@
#if (toSleep > 0):
#time.sleep(toSleep)
- def __ACCOUNTFLUSH(self):
+ def __ACCOUNTFLUSH(self):
try:
if (self.accountingClient is not None):
self.accountingClient.record(self.accountBuffer)
@@ -145,45 +143,51 @@
self.log.exception("Failed to flush accounting data")
- def __ACCOUNT(self, text, instance=None, host=None):
- now = time.time()
- instanceText = None
- hostText = None
+ def __ACCOUNT(self, text, instance=None, host=None):
+ now = time.time()
+ instanceText = None
+ hostText = None
- if instance is not None:
+ if instance is not None:
try:
- instanceText = 'Instance(%s)' % (instance)
+ instanceText = 'Instance(%s)' % (instance)
except:
self.log.exception("Invalid instance data")
- if host is not None:
+ if host is not None:
try:
- hostText = "Host(%s)" % (host)
+ hostText = "Host(%s)" % (host)
except:
self.log.exception("Invalid host data")
- secondary = ','.join(filter(None, (hostText, instanceText)))
+ secondary = ','.join(filter(None, (hostText, instanceText)))
- line = "%s|%s|%s" % (now, text, secondary)
+ line = "%s|%s|%s" % (now, text, secondary)
- self.accountBuffer.append(line)
- self.accountLines += 1
+ self.accountBuffer.append(line)
+ self.accountLines += 1
# XXXstroucki think about force flush every so often
- if (self.accountLines > 0):
- self.__ACCOUNTFLUSH()
+ if (self.accountLines > 0):
+ self.__ACCOUNTFLUSH()
# service thread function
def __registerWithClusterManager(self):
+ happy = False
while True:
#self.__ACCOUNT("TESTING")
start = time.time()
try:
instances = self.instances.values()
self.id = self.cm.registerNodeManager(self.host, instances)
+ if not happy:
+ happy = True
+ self.log.info("Registered with the CM")
+
except Exception:
self.log.exception('Failed to register with the CM')
+ happy = False
toSleep = start - time.time() + self.registerFrequency
if (toSleep > 0):
@@ -201,26 +205,29 @@
instance = self.instances.get(vmId, None)
if (not instance):
continue
- id = instance.id
+ _id = instance.id
stats = self.vmm.getStats(vmId)
for stat in stats:
- publishList.append({"vm_%d_%s" % (id, stat):stats[stat]})
+ publishList.append({"vm_%d_%s" % (_id, stat):stats[stat]})
except:
self.log.exception('statsThread threw an exception')
if (len(publishList) > 0):
- tashi.publisher.publishList(publishList)
+ # XXXstroucki: no publisher currently
+ pass
+ #tashi.publisher.publishList(publishList)
except:
self.log.exception('statsThread threw an exception')
time.sleep(self.statsInterval)
- def __registerHost(self):
- hostname = socket.gethostname()
+ def __registerHost(self):
+ hostname = socket.gethostname()
# populate some defaults
- # XXXstroucki: I think it's better if the nodemanager fills these in properly when registering with the clustermanager
+ # XXXstroucki: I think it's better if the nodemanager fills these in
+ # properly when registering with the clustermanager
memory = 0
cores = 0
version = "empty"
- #self.cm.registerHost(hostname, memory, cores, version)
+ #self.cm.registerHost(hostname, memory, cores, version)
def __getInstance(self, vmId):
instance = self.instances.get(vmId, None)
@@ -235,15 +242,23 @@
raise TashiException(d={'errno':Errors.NoSuchVmId,'msg':"There is no vmId %d on this host" % (vmId)})
-
+
# remote
# Called from VMM to update self.instances
# but only changes are Exited, MigrateTrans and Running
# qemu.py calls this in the matchSystemPids thread
# xenpv.py: i have no real idea why it is called there
def vmStateChange(self, vmId, old, cur):
- instance = self.__getInstance(vmId)
+ try:
+ instance = self.__getInstance(vmId)
+ except TashiException, e:
+ if e.errno == Errors.NoSuchVmId:
+ self.log.warning("Asked to change state for unknown VM. Has it not completed starting yet?")
+ return False
+ else:
+ raise
+ before = instance.state
if (instance.state == cur):
# Don't do anything if state is what it should be
return True
@@ -252,16 +267,25 @@
# make a note of mismatch, but go on.
# the VMM should know best
self.log.warning('VM state was %s, call indicated %s' % (vmStates[instance.state], vmStates[old]))
-
+
instance.state = cur
self.__ACCOUNT("NM VM STATE CHANGE", instance=instance)
-
+
newInst = Instance(d={'state':cur})
success = lambda: None
- # send the state change up to the CM
- self.notifyCM.append((instance.id, newInst, old, success))
- self.__flushNotifyCM()
+
+ # if this instance was in MigrateTrans, and has exited
+ # then don't tell the CM; it is the source instance
+ # exiting, and the CM should have updated its information
+ # to the target instance's info.
+ # Otherwise, send the state change up to the CM
+
+ if before == InstanceState.MigrateTrans and cur == InstanceState.Exited:
+ pass
+ else:
+ self.notifyCM.append((instance.id, newInst, old, success))
+ self.__flushNotifyCM()
# cache change locally
self.instances[vmId] = instance
@@ -270,7 +294,6 @@
# At this point, the VMM will clean up,
# so forget about this instance
del self.instances[vmId]
- return True
return True
@@ -278,10 +301,12 @@
def createInstance(self, instance):
vmId = instance.vmId
self.instances[vmId] = instance
-
-
+
+
# remote
def instantiateVm(self, instance):
+ # XXXstroucki: check my capacity before instantiating
+
self.__ACCOUNT("NM VM INSTANTIATE", instance=instance)
try:
vmId = self.vmm.instantiateVm(instance)
@@ -291,7 +316,7 @@
return vmId
except:
self.log.exception("Failed to start instance")
-
+
# remote
def suspendVm(self, vmId, destination):
instance = self.__getInstance(vmId)
@@ -300,10 +325,12 @@
instance.state = InstanceState.Suspending
self.instances[vmId] = instance
threading.Thread(target=self.vmm.suspendVm, args=(vmId, destination)).start()
-
+
# called by resumeVm as thread
def __resumeVmHelper(self, instance, name):
self.vmm.resumeVmHelper(instance, name)
+ # XXXstroucki should the VMM be responsible for setting
+ # state? It should know better.
instance.state = InstanceState.Running
newInstance = Instance(d={'id':instance.id,'state':instance.state})
success = lambda: None
@@ -323,7 +350,7 @@
self.log.exception('resumeVm failed')
raise TashiException(d={'errno':Errors.UnableToResume,'msg':"resumeVm failed on the node manager"})
return instance.vmId
-
+
# remote
def prepReceiveVm(self, instance, source):
self.__ACCOUNT("NM VM MIGRATE RECEIVE PREP")
@@ -342,7 +369,9 @@
# XXXstroucki migrate out?
def __migrateVmHelper(self, instance, target, transportCookie):
self.vmm.migrateVm(instance.vmId, target.name, transportCookie)
- del self.instances[instance.vmId]
+ # removal from self.instances done by communication from
+ # VMM as part of above migrateVm function
+ return
# remote
# XXXstroucki migrate out?
@@ -351,9 +380,9 @@
self.__ACCOUNT("NM VM MIGRATE", instance=instance)
instance.state = InstanceState.MigrateTrans
self.instances[vmId] = instance
- threading.Thread(target=self.__migrateVmHelper, args=(instance, target, transportCookie)).start()
+ threading.Thread(name="migrateVmHelper", target=self.__migrateVmHelper, args=(instance, target, transportCookie)).start()
return
-
+
# called by receiveVm as thread
# XXXstroucki migrate in?
def __receiveVmHelper(self, instance, transportCookie):
@@ -364,15 +393,16 @@
self.instances[vmId] = instance
newInstance = Instance(d={'id':instance.id,'state':instance.state,'vmId':instance.vmId,'hostId':instance.hostId})
success = lambda: None
- self.notifyCM.append((newInstance.id, newInstance, InstanceState.Running, success))
+ self.notifyCM.append((newInstance.id, newInstance, InstanceState.MigrateTrans, success))
self.__flushNotifyCM()
# remote
# XXXstroucki migrate in?
def receiveVm(self, instance, transportCookie):
instance.state = InstanceState.MigrateTrans
- vmId = instance.vmId
- self.instances[vmId] = instance
+ # XXXstroucki new vmId is not known yet until VM is received
+ #vmId = instance.vmId
+ #self.instances[vmId] = instance
self.__ACCOUNT("NM VM MIGRATE RECEIVE", instance=instance)
threading.Thread(target=self.__receiveVmHelper, args=(instance, transportCookie)).start()
return
@@ -429,4 +459,3 @@
# remote
def liveCheck(self):
return "alive"
-
diff --git a/src/tashi/nodemanager/vmcontrol/qemu.py b/src/tashi/nodemanager/vmcontrol/qemu.py
index 7806f4b..d00d07d 100644
--- a/src/tashi/nodemanager/vmcontrol/qemu.py
+++ b/src/tashi/nodemanager/vmcontrol/qemu.py
@@ -50,12 +50,12 @@
output = child.monitorFd
#print "listen"
select.select([ls], [], [])
- (s, clientAddr) = listenSocket.accept()
+ (s, __clientAddr) = listenSocket.accept()
while s:
if (output != -1):
- (rl, wl, el) = select.select([s, output], [], [])
+ (rl, __wl, __el) = select.select([s, output], [], [])
else:
- (rl, wl, el) = select.select([s], [], [])
+ (rl, __wl, __el) = select.select([s], [], [])
if (len(rl) > 0):
if (rl[0] == s):
#print "from s"
@@ -87,17 +87,19 @@
def __init__(self, config, dfs, nm):
VmControlInterface.__init__(self, config, dfs, nm)
- self.QEMU_BIN = self.config.get("Qemu", "qemuBin")
- self.INFO_DIR = self.config.get("Qemu", "infoDir")
- self.POLL_DELAY = float(self.config.get("Qemu", "pollDelay"))
- self.migrationRetries = int(self.config.get("Qemu", "migrationRetries"))
- self.monitorTimeout = float(self.config.get("Qemu", "monitorTimeout"))
- self.migrateTimeout = float(self.config.get("Qemu", "migrateTimeout"))
- self.useMigrateArgument = boolean(self.config.get("Qemu", "useMigrateArgument"))
- self.statsInterval = float(self.config.get("Qemu", "statsInterval"))
- # XXXstroucki amount of reserved memory could be configurable
- self.reservedMem = 512
- # XXXstroucki perhaps make this configurable
+ self.QEMU_BIN = self.config.get("Qemu", "qemuBin", default = "/usr/bin/kvm")
+ self.INFO_DIR = self.config.get("Qemu", "infoDir", default = "/var/tmp/VmControlQemu/")
+ self.POLL_DELAY = float(self.config.get("Qemu", "pollDelay", default = 1))
+ self.migrationRetries = int(self.config.get("Qemu", "migrationRetries", default = 10))
+ self.monitorTimeout = float(self.config.get("Qemu", "monitorTimeout", default = 60))
+ self.migrateTimeout = float(self.config.get("Qemu", "migrateTimeout", default = 300))
+ self.useMigrateArgument = boolean(self.config.get("Qemu", "useMigrateArgument", default = False))
+ self.statsInterval = float(self.config.get("Qemu", "statsInterval", default = 0))
+ reservedMem = self.config.get("Qemu", "reservedMem", default = 512)
+ reservedMem = int(reservedMem)
+
+ self.reservedMem = reservedMem
+
self.ifPrefix = "tashi"
self.controlledVMs = {}
self.usedPorts = []
@@ -106,13 +108,20 @@
self.vncPortLock = threading.Lock()
self.consolePort = 10000
self.consolePortLock = threading.Lock()
- self.migrationSemaphore = threading.Semaphore(int(self.config.get("Qemu", "maxParallelMigrations")))
+ maxParallelMigrations = self.config.get("Qemu", "maxParallelMigrations")
+ maxParallelMigrations = int(maxParallelMigrations)
+ if maxParallelMigrations < 1:
+ maxParallelMigrations = 1
+
+ self.migrationSemaphore = threading.Semaphore(maxParallelMigrations)
self.stats = {}
+
+ self.suspendHandler = self.config.get("Qemu", "suspendHandler", default = "gzip")
+ self.resumeHandler = self.config.get("Qemu", "resumeHandler", default = "zcat")
+
self.scratchVg = self.config.get("Qemu", "scratchVg")
- # XXXstroucki revise
- self.scratchDir = self.config.get("Qemu", "scratchDir")
- if len(self.scratchDir) == 0:
- self.scratchDir = "/tmp"
+
+ self.scratchDir = self.config.get("Qemu", "scratchDir", default = "/tmp")
try:
os.mkdir(self.INFO_DIR)
@@ -129,13 +138,20 @@
def __init__(self, **attrs):
self.__dict__.update(attrs)
+ def __dereferenceLink(self, spec):
+ newspec = os.path.realpath(spec)
+ return newspec
+
+
def __getHostPids(self):
"""Utility function to get a list of system PIDs that match the QEMU_BIN specified (/proc/nnn/exe)"""
pids = []
+ real_bin = self.__dereferenceLink(self.QEMU_BIN)
+
for f in os.listdir("/proc"):
try:
- bin = os.readlink("/proc/%s/exe" % (f))
- if (bin.find(self.QEMU_BIN) != -1):
+ binary = os.readlink("/proc/%s/exe" % (f))
+ if (binary.find(real_bin) != -1):
pids.append(int(f))
except Exception:
pass
@@ -146,14 +162,14 @@
"""Will return a dict of instances by vmId to the caller"""
return dict((x, self.controlledVMs[x].instance) for x in self.controlledVMs.keys())
- def __matchHostPids(self, controlledVMs):
+ def __matchHostPids(self):
"""This is run in a separate polling thread and it must do things that are thread safe"""
- vmIds = controlledVMs.keys()
+ vmIds = self.controlledVMs.keys()
pids = self.__getHostPids()
for vmId in vmIds:
- child = controlledVMs[vmId]
+ child = self.controlledVMs[vmId]
instance = child.instance
name = instance.name
@@ -164,9 +180,9 @@
# remove info file
os.unlink(self.INFO_DIR + "/%d"%(vmId))
- # XXXstroucki why not use self.controlledVMs
- # argument, so why modify this fn's formal?
- del controlledVMs[vmId]
+ # XXXstroucki python should handle
+ # locking here (?)
+ del self.controlledVMs[vmId]
# remove any stats (appropriate?)
try:
@@ -187,7 +203,7 @@
try:
os.waitpid(vmId, 0)
except:
- log.exception("waitpid failed for vmId" % (vmId))
+ log.exception("waitpid failed for vmId %s" % (vmId))
# recover the child's stderr and monitor
# output if possible
if (child.errorBit):
@@ -204,17 +220,21 @@
# remove scratch storage
try:
if self.scratchVg is not None:
+ scratchName = "lv%s" % name
log.info("Removing any scratch for %s" % (name))
- cmd = "/sbin/lvremove --quiet -f %s" % self.scratchVg
- result = subprocess.Popen(cmd.split(), executable=cmd.split()[0], stdout=subprocess.PIPE, stderr=open(os.devnull, "w"), close_fds=True).wait()
+ cmd = "/sbin/lvremove --quiet -f %s/%s" % (self.scratchVg, scratchName)
+ __result = subprocess.Popen(cmd.split(), executable=cmd.split()[0], stdout=subprocess.PIPE, stderr=open(os.devnull, "w"), close_fds=True).wait()
except:
log.warning("Problem cleaning scratch volumes")
pass
# let the NM know
try:
- if (not child.migratingOut):
- self.nm.vmStateChange(vmId, None, InstanceState.Exited)
+ # XXXstroucki: we don't want to treat
+ # the source VM of a migration exiting
+ # as an actual
+ # exit, but the NM should probably know.
+ self.nm.vmStateChange(vmId, None, InstanceState.Exited)
except Exception:
log.exception("vmStateChange failed for VM %s" % (name))
else:
@@ -273,7 +293,7 @@
while True:
try:
time.sleep(self.POLL_DELAY)
- self.__matchHostPids(self.controlledVMs)
+ self.__matchHostPids()
except:
log.exception("Exception in poolVMsLoop")
@@ -294,7 +314,7 @@
monitorFd = child.monitorFd
buf = ""
try:
- (rlist, wlist, xlist) = select.select([monitorFd], [], [], 0.0)
+ (rlist, __wlist, __xlist) = select.select([monitorFd], [], [], 0.0)
while (len(rlist) > 0):
c = os.read(monitorFd, 1)
if (c == ""):
@@ -302,7 +322,7 @@
child.errorBit = True
raise RuntimeError
buf = buf + c
- (rlist, wlist, xlist) = select.select([monitorFd], [], [], 0.0)
+ (rlist, __wlist, __xlist) = select.select([monitorFd], [], [], 0.0)
finally:
child.monitorHistory.append(buf)
return buf
@@ -317,14 +337,14 @@
while (buf[-(len(needle)):] != needle):
#print "[BUF]: %s" % (buf)
#print "[NEE]: %s" % (needle)
- (rlist, wlist, xlist) = select.select([monitorFd], [], [], timeout)
+ (rlist, __wlist, __xlist) = select.select([monitorFd], [], [], timeout)
if (len(rlist) == 0):
- log.error("Timeout getting results from monitor for vmId %d" % (child.pid))
+ log.error("Timeout getting results from monitor on FD %s for vmId %d" % (monitorFd, child.pid))
child.errorBit = True
raise RuntimeError
c = os.read(monitorFd, 1)
if (c == ""):
- log.error("Early termination on monitor for vmId %d" % (child.pid))
+ log.error("Early termination on monitor FD %s for vmId %d" % (monitorFd, child.pid))
child.errorBit = True
raise RuntimeError
buf = buf + c
@@ -426,6 +446,7 @@
disk = instance.disks[index]
uri = scrubString(disk.uri)
imageLocal = self.dfs.getLocalHandle("images/" + uri)
+ imageLocal = self.__dereferenceLink(imageLocal)
thisDiskList = [ "file=%s" % imageLocal ]
thisDiskList.append("if=%s" % diskInterface)
thisDiskList.append("index=%d" % index)
@@ -466,7 +487,7 @@
# XXXstroucki check for capacity
cmd = "/sbin/lvcreate --quiet -n%s -L %dG %s" % (scratchName, scratchSize, self.scratchVg)
# XXXstroucki check result
- result = subprocess.Popen(cmd.split(), executable=cmd.split()[0], stdout=subprocess.PIPE).wait()
+ __result = subprocess.Popen(cmd.split(), executable=cmd.split()[0], stdout=subprocess.PIPE).wait()
index += 1
thisDiskList = [ "file=/dev/%s/%s" % (self.scratchVg, scratchName) ]
@@ -500,8 +521,14 @@
nicModel = self.__stripSpace(nicModel)
nicString = ""
+ nicNetworks = {}
for i in range(0, len(instance.nics)):
+ # Don't allow more than one interface per vlan
nic = instance.nics[i]
+ if nicNetworks.has_key(nic.network):
+ continue
+ nicNetworks[nic.network] = True
+
nicString = nicString + "-net nic,macaddr=%s,model=%s,vlan=%d -net tap,ifname=%s%d.%d,vlan=%d,script=/etc/qemu-ifup.%d " % (nic.mac, nicModel, nic.network, self.ifPrefix, instance.id, i, nic.network, nic.network)
# ACPI
@@ -593,10 +620,15 @@
# trying to restart the migration by running
# the command again (when qemu is ready to
# listen again) is probably not helpful
+ # XXXstroucki: failures observed:
+ # "migration failed"
+ # "Block format 'qcow' used by device '' does not support feature 'live migration'
success = False
+ # see if migration can be speeded up
+ res = self.__enterCommand(child, "migrate_set_speed 1g", timeout=self.migrateTimeout)
res = self.__enterCommand(child, "migrate -i %s" % (target), timeout=self.migrateTimeout)
retry = retry - 1
- if (res.find("migration failed") == -1):
+ if (res.find("Block migration completed") != -1):
success = True
retry = 0
break
@@ -613,6 +645,8 @@
# extern
def instantiateVm(self, instance):
+ # XXXstroucki: check capacity before instantiating
+
try:
(vmId, cmd) = self.__startVm(instance, None)
child = self.__getChildFromPid(vmId)
@@ -632,16 +666,23 @@
# extern
def suspendVm(self, vmId, target):
- tmpTarget = "/%s/tashi_qemu_suspend_%d_%d" % (self.scratchDir, os.getpid(), vmId)
# XXX: Use fifo to improve performance
- vmId = self.__stopVm(vmId, "\"exec:gzip -c > %s\"" % (tmpTarget), True)
- self.dfs.copyTo(tmpTarget, target)
- os.unlink(tmpTarget)
+ # XXXstroucki: we could create a fifo on the local fs,
+ # then start a thread to copy it to dfs. But if we're
+ # reading from dfs directly on resume, why not write
+ # directly here?
+
+ #tmpTarget = "/%s/tashi_qemu_suspend_%d_%d" % (self.scratchDir, os.getpid(), vmId)
+ fn = self.dfs.getLocalHandle("%s" % target)
+ vmId = self.__stopVm(vmId, "\"exec:%s > %s\"" % (self.suspendHandler, fn), True)
+ #self.dfs.copyTo(tmpTarget, target)
+ #os.unlink(tmpTarget)
return vmId
# extern
def resumeVmHelper(self, instance, source):
- child = self.__getChildFromPid(instance.vmId)
+ vmId = instance.vmId
+ child = self.__getChildFromPid(vmId)
try:
self.__getPtyInfo(child, True)
except RuntimeError:
@@ -650,21 +691,25 @@
raise
status = "paused"
while ("running" not in status):
- status = self.__enterCommand(child, "info status")
- time.sleep(1)
+ try:
+ status = self.__enterCommand(child, "info status")
+ except RuntimeError:
+ pass
+ time.sleep(60)
+
+ self.nm.vmStateChange(vmId, None, InstanceState.Running)
child.instance.state = InstanceState.Running
self.__saveChildInfo(child)
# extern
def resumeVm(self, instance, source):
fn = self.dfs.getLocalHandle("%s" % (source))
- (vmId, cmd) = self.__startVm(instance, "exec:zcat %s" % (fn))
+ (vmId, cmd) = self.__startVm(instance, "exec:%s < %s" % (self.resumeHandler, fn))
child = self.__getChildFromPid(vmId)
child.cmd = cmd
return vmId
def __checkPortListening(self, port):
- lc = 0
# XXXpipe: find whether something is listening yet on the port
(stdin, stdout) = os.popen2("netstat -ln | grep 0.0.0.0:%d | wc -l" % (port))
stdin.close()
@@ -720,7 +765,9 @@
try:
child = self.__getChildFromPid(vmId)
except:
- log.error("Failed to get child info; transportCookie = %s; hostname = %s" % (str(cPickle.loads(transportCookie)), socket.hostname()))
+ # XXXstroucki: Does hostname contain the peer hostname?
+ log.error("Failed to get child info; transportCookie = %s; hostname = %s" %
+ (str(cPickle.loads(transportCookie)), _hostname))
raise
try:
self.__getPtyInfo(child, True)
@@ -808,6 +855,11 @@
threading.Thread(target=controlConsole, args=(child,consolePort)).start()
return "Control console listening on %s:%d" % (hostname, consolePort)
+ def __specificReset(self, vmId):
+ child = self.__getChildFromPid(vmId)
+ self.__enterCommand(child, "system_reset")
+ return "Sent reset signal to instance"
+
# extern
def vmmSpecificCall(self, vmId, arg):
arg = arg.lower()
@@ -826,12 +878,16 @@
elif (arg == "startconsole"):
return self.__specificStartConsole(vmId)
+ elif (arg == "reset"):
+ return self.__specificReset(vmId)
+
elif (arg == "list"):
commands = [
"startVnc",
"stopVnc",
"changeCdrom:<image.iso>",
"startConsole",
+ "reset",
]
return "\n".join(commands)
@@ -842,11 +898,63 @@
def listVms(self):
return self.controlledVMs.keys()
+ def __processVmStats(self, vmId):
+ try:
+ f = open("/proc/%d/stat" % (vmId))
+ procData = f.read()
+ f.close()
+ except:
+ log.warning("Unable to get data for instance %d" % vmId)
+ return
+
+ ws = procData.strip().split()
+ userTicks = float(ws[13])
+ sysTicks = float(ws[14])
+ myTicks = userTicks + sysTicks
+ vsize = (int(ws[22]))/1024.0/1024.0
+ rss = (int(ws[23])*4096)/1024.0/1024.0
+ cpuSeconds = myTicks/self.ticksPerSecond
+ # XXXstroucki be more exact here?
+ last = time.time() - self.statsInterval
+ lastCpuSeconds = self.cpuStats.get(vmId, cpuSeconds)
+ if lastCpuSeconds is None:
+ lastCpuSeconds = cpuSeconds
+ cpuLoad = (cpuSeconds - lastCpuSeconds)/(time.time() - last)
+ self.cpuStats[vmId] = cpuSeconds
+ try:
+ child = self.controlledVMs[vmId]
+ except:
+ log.warning("Unable to obtain information on instance %d" % vmId)
+ return
+
+ (recvMBs, sendMBs, recvBytes, sendBytes) = (0.0, 0.0, 0.0, 0.0)
+ for i in range(0, len(child.instance.nics)):
+ netDev = "%s%d.%d" % (self.ifPrefix, child.instance.id, i)
+ (tmpRecvMBs, tmpSendMBs, tmpRecvBytes, tmpSendBytes) = self.netStats.get(netDev, (0.0, 0.0, 0.0, 0.0))
+ (recvMBs, sendMBs, recvBytes, sendBytes) = (recvMBs + tmpRecvMBs, sendMBs + tmpSendMBs, recvBytes + tmpRecvBytes, sendBytes + tmpSendBytes)
+ self.stats[vmId] = self.stats.get(vmId, {})
+ child = self.controlledVMs.get(vmId, None)
+ if (child):
+ res = self.__enterCommand(child, "info blockstats")
+ for l in res.split("\n"):
+ (device, __sep, data) = stringPartition(l, ": ")
+ if (data != ""):
+ for field in data.split(" "):
+ (label, __sep, val) = stringPartition(field, "=")
+ if (val != ""):
+ self.stats[vmId]['%s_%s_per_s' % (device, label)] = (float(val) - float(self.stats[vmId].get('%s_%s' % (device, label), 0)))/self.statsInterval
+ self.stats[vmId]['%s_%s' % (device, label)] = int(val)
+ self.stats[vmId]['cpuLoad'] = cpuLoad
+ self.stats[vmId]['rss'] = rss
+ self.stats[vmId]['vsize'] = vsize
+ self.stats[vmId]['recvMBs'] = sendMBs
+ self.stats[vmId]['sendMBs'] = recvMBs
+
# thread
def statsThread(self):
- ticksPerSecond = float(os.sysconf('SC_CLK_TCK'))
- netStats = {}
- cpuStats = {}
+ self.ticksPerSecond = float(os.sysconf('SC_CLK_TCK'))
+ self.netStats = {}
+ self.cpuStats = {}
# XXXstroucki be more exact here?
last = time.time() - self.statsInterval
while True:
@@ -857,12 +965,12 @@
f.close()
for l in netData:
if (l.find(self.ifPrefix) != -1):
- (dev, sep, ld) = stringPartition(l, ":")
+ (dev, __sep, ld) = stringPartition(l, ":")
dev = dev.strip()
ws = ld.split()
recvBytes = float(ws[0])
sendBytes = float(ws[8])
- (recvMBs, sendMBs, lastRecvBytes, lastSendBytes) = netStats.get(dev, (0.0, 0.0, recvBytes, sendBytes))
+ (recvMBs, sendMBs, lastRecvBytes, lastSendBytes) = self.netStats.get(dev, (0.0, 0.0, recvBytes, sendBytes))
if (recvBytes < lastRecvBytes):
# We seem to have overflowed
# XXXstroucki How likely is this to happen?
@@ -878,44 +986,12 @@
lastSendBytes = lastSendBytes - 2**32
recvMBs = (recvBytes-lastRecvBytes)/(now-last)/1024.0/1024.0
sendMBs = (sendBytes-lastSendBytes)/(now-last)/1024.0/1024.0
- netStats[dev] = (recvMBs, sendMBs, recvBytes, sendBytes)
+ self.netStats[dev] = (recvMBs, sendMBs, recvBytes, sendBytes)
+
+
for vmId in self.controlledVMs:
- f = open("/proc/%d/stat" % (vmId))
- procData = f.read()
- f.close()
- ws = procData.strip().split()
- userTicks = float(ws[13])
- sysTicks = float(ws[14])
- myTicks = userTicks + sysTicks
- vsize = (int(ws[22]))/1024.0/1024.0
- rss = (int(ws[23])*4096)/1024.0/1024.0
- cpuSeconds = myTicks/ticksPerSecond
- lastCpuSeconds = cpuStats.get(vmId, cpuSeconds)
- cpuLoad = (cpuSeconds - lastCpuSeconds)/(now - last)
- cpuStats[vmId] = cpuSeconds
- child = self.controlledVMs[vmId]
- (recvMBs, sendMBs, recvBytes, sendBytes) = (0.0, 0.0, 0.0, 0.0)
- for i in range(0, len(child.instance.nics)):
- netDev = "%s%d.%d" % (self.ifPrefix, child.instance.id, i)
- (tmpRecvMBs, tmpSendMBs, tmpRecvBytes, tmpSendBytes) = netStats.get(netDev, (0.0, 0.0, 0.0, 0.0))
- (recvMBs, sendMBs, recvBytes, sendBytes) = (recvMBs + tmpRecvMBs, sendMBs + tmpSendMBs, recvBytes + tmpRecvBytes, sendBytes + tmpSendBytes)
- self.stats[vmId] = self.stats.get(vmId, {})
- child = self.controlledVMs.get(vmId, None)
- if (child):
- res = self.__enterCommand(child, "info blockstats")
- for l in res.split("\n"):
- (device, sep, data) = stringPartition(l, ": ")
- if (data != ""):
- for field in data.split(" "):
- (label, sep, val) = stringPartition(field, "=")
- if (val != ""):
- self.stats[vmId]['%s_%s_per_s' % (device, label)] = (float(val) - float(self.stats[vmId].get('%s_%s' % (device, label), 0)))/self.statsInterval
- self.stats[vmId]['%s_%s' % (device, label)] = int(val)
- self.stats[vmId]['cpuLoad'] = cpuLoad
- self.stats[vmId]['rss'] = rss
- self.stats[vmId]['vsize'] = vsize
- self.stats[vmId]['recvMBs'] = sendMBs
- self.stats[vmId]['sendMBs'] = recvMBs
+ self.__processVmStats(vmId)
+
except:
log.exception("statsThread threw an exception")
last = now
diff --git a/src/tashi/nodemanager/vmcontrol/vmcontrolinterface.py b/src/tashi/nodemanager/vmcontrol/vmcontrolinterface.py
index cd4fde8..19447f4 100644
--- a/src/tashi/nodemanager/vmcontrol/vmcontrolinterface.py
+++ b/src/tashi/nodemanager/vmcontrol/vmcontrolinterface.py
@@ -28,8 +28,8 @@
self.dfs = dfs
self.nm = nm
- def getInstances(self):
- """Will return a dict of instances by vmId to the caller"""
+ def getInstances(self):
+ """Will return a dict of instances by vmId to the caller"""
raise NotImplementedError
def instantiateVm(self, instance):
diff --git a/src/tashi/nodemanager/vmcontrol/xenpv.py b/src/tashi/nodemanager/vmcontrol/xenpv.py
index 8bf4a29..f0e9c06 100644
--- a/src/tashi/nodemanager/vmcontrol/xenpv.py
+++ b/src/tashi/nodemanager/vmcontrol/xenpv.py
@@ -15,37 +15,35 @@
# specific language governing permissions and limitations
# under the License.
-import os
import os.path
import cPickle
-import subprocess # FIXME: should switch os.system to this
+import subprocess
import time
import threading
import logging
import socket
from vmcontrolinterface import VmControlInterface
-from tashi.rpycservices.rpyctypes import Errors, InstanceState, TashiException
+from tashi.rpycservices.rpyctypes import InstanceState
from tashi.rpycservices.rpyctypes import Instance, Host
-from tashi import boolean, convertExceptions, ConnectionManager, version
-from tashi.util import isolatedRPC, broken
+from tashi import version
+from tashi.util import broken
-import tashi.parallel
-from tashi.parallel import synchronized, synchronizedmethod
+from tashi.parallel import synchronizedmethod
log = logging.getLogger(__file__)
# FIXME: these should throw errors on failure
def domIdToName(domid):
# XXXpipe: get domain name from id
- f = os.popen("/usr/sbin/xm domname %i"%domid)
+ f = os.popen("/usr/sbin/xm domname %i"% domid)
name = f.readline().strip()
f.close()
return name
def domNameToId(domname):
# XXXpipe: get domain id from name
- f = os.popen("/usr/sbin/xm domid %s"%domname)
+ f = os.popen("/usr/sbin/xm domid %s"% domname)
name = f.readline().strip()
f.close()
return int(name)
@@ -55,10 +53,10 @@
if domname[0:(len(prefix))] != prefix:
return None
try:
- id = int(domname[len(prefix):])
+ _id = int(domname[len(prefix):])
except:
return None
- return id
+ return _id
# Try to do a listVms call using info from xend
@@ -80,12 +78,12 @@
vminfo[fields[i]] = line[i]
# if the name begins with our prefix, get the id,
# otherwise skip this record
- id = nameToId(vminfo['name'], prefix)
- if id == None:
+ _id = nameToId(vminfo['name'], prefix)
+ if _id == None:
continue
# fill in the instance object
- instance.id = int(id)
+ instance.id = int(_id)
instance.vmId = int(vminfo['vmId'])
instance.state = InstanceState.Running
if(vminfo['state'][2] !='-'):
@@ -142,8 +140,8 @@
self.nm.vmStateChange(a.vmId, a.state, InstanceState.Exited)
for vmId in vmlist.keys():
if not self.newvms.has_key(vmId):
+ # FIXME: log this
print 'WARNING: found vm that should be managed, but is not'
- # FIXME: log that
def run(self):
@@ -155,7 +153,7 @@
# a lot easier
########################################
def createXenConfig(self, vmName,
- image, macAddr, netID, memory, cores, hints, id):
+ image, macAddr, netID, memory, cores, hints, _id):
bootstr = None
rootconfig = None
diskconfig = None
@@ -168,9 +166,10 @@
vmType = hints.get('vmtype', self.defaultVmType)
print 'starting vm with type: ', vmType
- disk0 = 'tap:%s' % self.disktype
+ disk0 = 'tap:%s' % self.disktype
diskU = 'xvda1'
+ # XXXstroucki: use soft config
try:
bridgeformat = self.config.get('XenPV', 'defaultBridgeFormat')
except:
@@ -313,10 +312,10 @@
@synchronizedmethod
def instantiateVm(self, instance):
- try:
- disktype = self.config.get('XenPV', 'defaultDiskType')
- except:
- disktype = 'vhd'
+ try:
+ disktype = self.config.get('XenPV', 'defaultDiskType')
+ except:
+ disktype = 'vhd'
# FIXME: this is NOT the right way to get out hostId
self.hostId = instance.hostId
@@ -346,6 +345,8 @@
instance.disks[i].local = newdisk
+ # XXXstroucki if ever supporting multiple nics,
+ # ensure more than one isn't put on the same network.
fn = self.createXenConfig(name,
instance.disks[0].local,
instance.nics[0].mac,
@@ -358,9 +359,9 @@
r = os.system(cmd)
# self.deleteXenConfig(name)
if r != 0:
+ # FIXME: log/handle error
print 'WARNING: "%s" returned %i' % ( cmd, r)
raise Exception, 'WARNING: "%s" returned %i' % ( cmd, r)
- # FIXME: log/handle error
vmId = domNameToId(name)
self.newvms[vmId] = instance
instance.vmId = vmId
@@ -385,7 +386,7 @@
instance = self.newvms[vmId]
instance.suspendCookie = suspendCookie
infof = self.dfs.open(infofile, "w")
- name = domIdToName(vmId)
+ #name = domIdToName(vmId)
cPickle.dump(instance, infof)
infof.close()
@@ -414,7 +415,7 @@
self.dfs.unlink(infofile)
self.dfs.copyFrom(source, tmpfile)
- r = os.system("/usr/sbin/xm restore %s"%(tmpfile))
+ __r = os.system("/usr/sbin/xm restore %s"%(tmpfile))
os.unlink(tmpfile)
# FIXME: if the vmName function changes, suspended vms will become invalid
diff --git a/src/tashi/parallel.py b/src/tashi/parallel.py
index 09fe57e..03832bd 100644
--- a/src/tashi/parallel.py
+++ b/src/tashi/parallel.py
@@ -34,7 +34,8 @@
def __init__(self, size=8, maxsize=0):
Queue.Queue.__init__(self, maxsize)
for i in range(size):
- thread = threading.Thread(target=self._worker)
+ name = "parallel.ThreadPool#%s" % (i)
+ thread = threading.Thread(name=name, target=self._worker)
thread.setDaemon(True)
thread.start()
def _worker(self):
@@ -129,9 +130,9 @@
time.sleep(sleep)
queue.put(None)
tt = time.time()
- for i in range(4):
+ for _ in range(4):
slowfunc()
- for i in range(4):
+ for _ in range(4):
queue.get()
tt = time.time() - tt
self.assertAlmostEqual(tt, 4, 1)
@@ -143,9 +144,9 @@
time.sleep(sleep)
queue.put(None)
tt = time.time()
- for i in range(8):
+ for _ in range(8):
slowthreadfunc()
- for i in range(8):
+ for _ in range(8):
queue.get()
tt = time.time() - tt
self.assertAlmostEqual(tt, 1, 1)
@@ -158,9 +159,9 @@
time.sleep(sleep)
queue.put(None)
tt = time.time()
- for i in range(8):
+ for _ in range(8):
slowpoolfunc()
- for i in range(8):
+ for _ in range(8):
queue.get()
tt = time.time() - tt
self.assertAlmostEqual(tt, 2, 1)
@@ -175,9 +176,9 @@
queue.put(None)
sc = slowclass()
tt = time.time()
- for i in range(4):
+ for _ in range(4):
sc.beslow()
- for i in range(4):
+ for _ in range(4):
queue.get()
tt = time.time() - tt
self.assertAlmostEqual(tt, 4, 1)
@@ -193,9 +194,9 @@
queue.put(None)
sc = slowclass()
tt = time.time()
- for i in range(4):
+ for _ in range(4):
sc.beslow()
- for i in range(4):
+ for _ in range(4):
queue.get()
tt = time.time() - tt
self.assertAlmostEqual(tt, 1, 1)
@@ -211,9 +212,9 @@
queue.put(None)
sc = slowclass()
tt = time.time()
- for i in range(16):
+ for _ in range(16):
sc.beslow()
- for i in range(16):
+ for _ in range(16):
queue.get()
tt = time.time() - tt
self.assertAlmostEqual(tt, 2, 1)
@@ -228,9 +229,9 @@
def slowthreadfunc():
addtoqueue()
tt = time.time()
- for i in range(4):
+ for _ in range(4):
slowthreadfunc()
- for i in range(4):
+ for _ in range(4):
queue.get()
tt = time.time() - tt
self.assertAlmostEqual(tt, 4, 1)
@@ -254,10 +255,10 @@
def slowthreadfunc2():
atc.addtoqueue2()
tt = time.time()
- for i in range(4):
+ for _ in range(4):
slowthreadfunc1()
slowthreadfunc2()
- for i in range(8):
+ for _ in range(8):
queue.get()
tt = time.time() - tt
self.assertAlmostEqual(tt, 8, 1)
@@ -279,10 +280,10 @@
def slowthreadfunc2():
atc.addtoqueue2()
tt = time.time()
- for i in range(4):
+ for _ in range(4):
slowthreadfunc1()
slowthreadfunc2()
- for i in range(8):
+ for _ in range(8):
queue.get()
tt = time.time() - tt
self.assertAlmostEqual(tt, 1, 1)
diff --git a/src/tashi/rpycservices/rpycservices.py b/src/tashi/rpycservices/rpycservices.py
index c66a40e..65460e3 100644
--- a/src/tashi/rpycservices/rpycservices.py
+++ b/src/tashi/rpycservices/rpycservices.py
@@ -19,7 +19,7 @@
from tashi.rpycservices.rpyctypes import Instance, Host, User
import cPickle
-clusterManagerRPCs = ['createVm', 'shutdownVm', 'destroyVm', 'suspendVm', 'resumeVm', 'migrateVm', 'pauseVm', 'unpauseVm', 'getHosts', 'getNetworks', 'getUsers', 'getInstances', 'vmmSpecificCall', 'registerNodeManager', 'vmUpdate', 'activateVm', 'registerHost', 'getImages', 'copyImage']
+clusterManagerRPCs = ['createVm', 'shutdownVm', 'destroyVm', 'suspendVm', 'resumeVm', 'migrateVm', 'pauseVm', 'unpauseVm', 'getHosts', 'getNetworks', 'getUsers', 'getInstances', 'vmmSpecificCall', 'registerNodeManager', 'vmUpdate', 'activateVm', 'registerHost', 'getImages', 'copyImage', 'setHostState']
nodeManagerRPCs = ['instantiateVm', 'shutdownVm', 'destroyVm', 'suspendVm', 'resumeVm', 'prepReceiveVm', 'prepSourceVm', 'migrateVm', 'receiveVm', 'pauseVm', 'unpauseVm', 'getVmInfo', 'listVms', 'vmmSpecificCall', 'getHostInfo', 'liveCheck']
accountingRPCs = ['record']
@@ -65,6 +65,9 @@
if name not in clusterManagerRPCs and name not in nodeManagerRPCs and name not in accountingRPCs:
return None
def connectWrap(*args):
+ # XXXstroucki: why not talk directly, instead
+ # of using rpyc? We're already using pickle to move
+ # args.
args = cPickle.dumps(clean(args))
try:
res = getattr(self.conn.root, name)(args)
diff --git a/src/tashi/rpycservices/rpyctypes.py b/src/tashi/rpycservices/rpyctypes.py
index e4d613f..2d65928 100644
--- a/src/tashi/rpycservices/rpyctypes.py
+++ b/src/tashi/rpycservices/rpyctypes.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+# XXXstroucki: shouldn't this be tashitypes.py instead?
+
class Errors(object):
ConvertedException = 1
NoSuchInstanceId = 2
diff --git a/src/tashi/thrift/build.py b/src/tashi/thrift/build.py
deleted file mode 100755
index 42b22fa..0000000
--- a/src/tashi/thrift/build.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/bin/python
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import shutil
-import os
-from os import path
-import re
-
-if __name__ == '__main__':
- if (path.exists('gen-py')):
- print 'Removing \'gen-py\' directory...'
- shutil.rmtree('gen-py')
-
- if (path.exists('../services')):
- print 'Removing \'../services\' directory...'
- shutil.rmtree('../services')
-
- if (path.exists('../messaging/messagingthrift')):
- print 'Removing \'../messaging/messagingthrift\' directory...'
- shutil.rmtree('../messaging/messagingthrift')
-
- print 'Generating Python code for \'services.thrift\'...'
- os.system('thrift --gen py:new_style services.thrift')
-
- print 'Copying generated code to \'tashi.services\' package...'
- shutil.copytree('gen-py/services', '../services')
-
- print 'Generatign Python code for \'messagingthrift\'...'
- os.system('rm -rf gen-py')
- os.system('thrift --gen py messagingthrift.thrift')
-
- print 'Copying generated code to \'tashi.messaging.messagingthrift\' package...'
- shutil.copytree(os.path.join('gen-py', 'messagingthrift'),
- os.path.join('..', 'messaging', 'messagingthrift'))
-
- print 'Generating Python code for \'layoutlocality.thrift\'...'
- os.system('thrift --gen py:new_style layoutlocality.thrift')
-
- print 'Copying generated code to \'tashi.services\' package...'
- shutil.copytree('gen-py/layoutlocality', '../services/layoutlocality')
diff --git a/src/tashi/thrift/layoutlocality.thrift b/src/tashi/thrift/layoutlocality.thrift
deleted file mode 100644
index e14910c..0000000
--- a/src/tashi/thrift/layoutlocality.thrift
+++ /dev/null
@@ -1,42 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-struct BlockLocation {
- list<string> hosts, // hostnames of data nodes
- list<i32> ports, // ports for data nodes
- list<string> names, // hostname:port of data nodes
- i64 blocknum,
- i64 offset,
- i64 length
-}
-
-struct Pathname {
- string pathname
-}
-
-exception FileNotFoundException {
- string message
-}
-
-service layoutservice {
- list <BlockLocation> getFileBlockLocations(1:Pathname path, 2:i64 offset, 3:i64 length)
- throws (1:FileNotFoundException ouch),
-}
-
-service localityservice {
- list <list<double>> getHopCountMatrix(1:list<string> sourceHosts, 2:list<string> destHosts),
-}
diff --git a/src/tashi/thrift/messagingthrift.thrift b/src/tashi/thrift/messagingthrift.thrift
deleted file mode 100644
index 401e9a1..0000000
--- a/src/tashi/thrift/messagingthrift.thrift
+++ /dev/null
@@ -1,36 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-typedef map<string, string> strstrmap
-
-service SubscriberThrift{
- # the async keyword seems to slow things down in the simple
- # tests. However, with non-trivial subscribers it will be
- # necessary to use async here.
- async void publish(strstrmap message)
- async void publishList(list<strstrmap> messages)
-}
-
-service MessageBrokerThrift{
- void log(strstrmap message),
- void addSubscriber(string host, i16 port)
- void removeSubscriber(string host, i16 port)
- async void publish(strstrmap message)
- async void publishList(list<strstrmap> messages)
-
-}
-
diff --git a/src/tashi/thrift/services.thrift b/src/tashi/thrift/services.thrift
deleted file mode 100644
index fa29c30..0000000
--- a/src/tashi/thrift/services.thrift
+++ /dev/null
@@ -1,166 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-enum Errors {
- ConvertedException = 1,
- NoSuchInstanceId = 2,
- NoSuchVmId = 3,
- IncorrectVmState = 4,
- NoSuchHost = 5,
- NoSuchHostId = 6,
- InstanceIdAlreadyExists = 7,
- HostNameMismatch = 8,
- HostNotUp = 9,
- HostStateError = 10,
- InvalidInstance = 11,
- UnableToResume = 12,
- UnableToSuspend = 13,
-}
-
-enum InstanceState {
- Pending = 1, // Job submitted
- Activating = 2, // activateVm has been called, but instantiateVm hasn't finished yet
- Running = 3, // Normal state
- Pausing = 4, // Beginning pause sequence
- Paused = 5 // Paused
- Unpausing = 6, // Beginning unpause sequence
- Suspending = 7, // Beginning suspend sequence
- Resuming = 8, // Beginning resume sequence
- MigratePrep = 9, // Migrate state #1
- MigrateTrans = 10, // Migrate state #2
- ShuttingDown = 11, // Beginning exit sequence
- Destroying = 12, // Beginning exit sequence
- Orphaned = 13, // Host is missing
- Held = 14, // Activation failed
- Exited = 15, // VM has exited
- Suspended = 16, // VM is suspended
-}
-
-enum HostState {
- Normal = 1,
- Drained = 2,
- VersionMismatch = 3
-}
-
-exception TashiException {
- 1: Errors errno
- 2: string msg
-}
-
-struct Host {
- 1:i32 id,
- 2:string name,
- 3:bool up,
- 4:bool decayed,
- 5:HostState state,
- 6:i32 memory,
- 7:i32 cores,
- 8:string version
- // Other properties (disk?)
-}
-
-struct Network {
- 1:i32 id
- 2:string name
-}
-
-struct User {
- 1:i32 id,
- 2:string name
-}
-
-struct DiskConfiguration {
- 1:string uri,
- 2:bool persistent
-}
-
-struct NetworkConfiguration {
- 1:i32 network,
- 2:string mac,
- 3:string ip
-}
-
-struct Instance {
- 1:i32 id,
- 2:i32 vmId,
- 3:i32 hostId,
- 4:bool decayed,
- 5:InstanceState state,
- 6:i32 userId,
- 7:string name, // User specified
- 8:i32 cores, // User specified
- 9:i32 memory, // User specified
- 10:list<DiskConfiguration> disks, // User specified
- 11:list<NetworkConfiguration> nics // User specified
- 12:map<string, string> hints // User specified
-}
-
-service clustermanagerservice {
- // Client-facing RPCs
- Instance createVm(1:Instance instance) throws (1:TashiException e)
-
- void shutdownVm(1:i32 instanceId) throws (1:TashiException e)
- void destroyVm(1:i32 instanceId) throws (1:TashiException e)
-
- void suspendVm(1:i32 instanceId) throws (1:TashiException e)
- Instance resumeVm(1:i32 instanceId) throws (1:TashiException e)
-
- void migrateVm(1:i32 instanceId, 2:i32 targetHostId) throws (1:TashiException e)
-
- void pauseVm(1:i32 instanceId) throws (1:TashiException e)
- void unpauseVm(1:i32 instanceId) throws (1:TashiException e)
-
- list<Host> getHosts() throws (1:TashiException e)
- list<Network> getNetworks() throws (1:TashiException e)
- list<User> getUsers() throws (1:TashiException e)
-
- list<Instance> getInstances() throws (1:TashiException e)
-
- string vmmSpecificCall(1:i32 instanceId, 2:string arg) throws (1:TashiException e)
-
- // NodeManager-facing RPCs
- i32 registerNodeManager(1:Host host, 2:list<Instance> instances) throws (1:TashiException e)
- void vmUpdate(1:i32 instanceId, 2:Instance instance, 3:InstanceState old) throws (1:TashiException e)
-
- // Agent-facing RPCs
- void activateVm(1:i32 instanceId, 2:Host host) throws (1:TashiException e)
-}
-
-service nodemanagerservice {
- // ClusterManager-facing RPCs
- i32 instantiateVm(1:Instance instance) throws (1:TashiException e)
-
- void shutdownVm(1:i32 vmId) throws (1:TashiException e)
- void destroyVm(1:i32 vmId) throws (1:TashiException e)
-
- void suspendVm(1:i32 vmId, 2:string destination) throws (1:TashiException e)
- i32 resumeVm(1:Instance instance, 2:string source) throws (1:TashiException e)
-
- string prepReceiveVm(1:Instance instance, 2:Host source) throws (1:TashiException e)
- void migrateVm(1:i32 vmId, 2:Host target, 3:string transportCookie) throws (1:TashiException e)
- void receiveVm(1:Instance instance, 2:string transportCookie) throws (1:TashiException e)
-
- void pauseVm(1:i32 vmId) throws (1:TashiException e)
- void unpauseVm(1:i32 vmId) throws (1:TashiException e)
-
- Instance getVmInfo(1:i32 vmId) throws (1:TashiException e)
- list<i32> listVms() throws (1:TashiException e)
-
- string vmmSpecificCall(1:i32 vmId, 2:string arg) throws (1:TashiException e)
-
- // Host getHostInfo() throws (1:TashiException e)
-}
diff --git a/src/tashi/util.py b/src/tashi/util.py
index 4eb0981..c2bd6b8 100644
--- a/src/tashi/util.py
+++ b/src/tashi/util.py
@@ -15,6 +15,9 @@
# specific language governing permissions and limitations
# under the License.
+#XXXstroucki: for compatibility with python 2.5
+from __future__ import with_statement
+
import ConfigParser
#import cPickle
import os
@@ -22,14 +25,15 @@
import signal
#import struct
import sys
-import threading
+#import threading
import time
import traceback
import types
-import getpass
+import functools
from tashi.rpycservices import rpycservices
from tashi.rpycservices.rpyctypes import TashiException, Errors, InstanceState, HostState
+from tashi.utils.timeout import *
def broken(oldFunc):
"""Decorator that is used to mark a function as temporarily broken"""
@@ -85,14 +89,14 @@
return res
return newFunc
-def editAndContinue(file, mod, name):
+def editAndContinue(filespec, mod, name):
def wrapper(oldFunc):
persist = {}
persist['lastMod'] = time.time()
persist['oldFunc'] = oldFunc
persist['func'] = oldFunc
def newFunc(*args, **kw):
- modTime = os.stat(file)[8]
+ modTime = os.stat(filespec)[8]
if (modTime > persist['lastMod']):
persist['lastMod'] = modTime
space = {}
@@ -148,14 +152,6 @@
def __delattr__(self, name):
return delattr(self.__dict__['__real_obj__'], name)
-def isolatedRPC(client, method, *args, **kw):
- """Opens and closes a thrift transport for a single RPC call"""
- if (not client._iprot.trans.isOpen()):
- client._iprot.trans.open()
- res = getattr(client, method)(*args, **kw)
- client._iprot.trans.close()
- return res
-
def signalHandler(signalNumber):
"""Used to denote a particular function as the signal handler for a
specific signal"""
@@ -170,6 +166,13 @@
return value
if (type(value) == types.IntType):
return (value != 0)
+
+ # See if it can be expressed as a string
+ try:
+ value = str(value)
+ except:
+ raise ValueError
+
lowercaseValue = value.lower()
if lowercaseValue in ['yes', 'true', '1']:
return True
@@ -186,13 +189,14 @@
cmd = "import %s\n" % (package)
else:
cmd = ""
- cmd += "obj = %s(*args)\n" % (className)
+ cmd += "_obj = %s(*args)\n" % (className)
exec cmd in locals()
- return obj
+ # XXXstroucki: this is correct, even though pydev complains
+ return _obj
def convertExceptions(oldFunc):
"""This converts any exception type into a TashiException so that
- it can be passed over a Thrift RPC"""
+ it can be passed over an RPC"""
def newFunc(*args, **kw):
try:
return oldFunc(*args, **kw)
@@ -218,31 +222,46 @@
raise Exception("No config file could be found: %s" % (str(allLocations)))
return (config, configFiles)
+def __getShellFn():
+ try:
+ from IPython.Shell import IPShellEmbed
+ return (1, IPShellEmbed)
+ except ImportError:
+ import IPython
+ return (2, IPython.embed)
+
def debugConsole(globalDict):
"""A debugging console that optionally uses pysh"""
def realDebugConsole(globalDict):
try :
import atexit
- from IPython.Shell import IPShellEmbed
+ (calltype, shellfn) = __getShellFn()
def resetConsole():
# XXXpipe: make input window sane
- (stdin, stdout) = os.popen2("reset")
+ (__stdin, stdout) = os.popen2("reset")
stdout.read()
- dbgshell = IPShellEmbed()
atexit.register(resetConsole)
- dbgshell(local_ns=globalDict, global_ns=globalDict)
- except Exception:
+ if calltype == 1:
+ dbgshell=shellfn(user_ns=globalDict)
+ dbgshell()
+ elif calltype == 2:
+ dbgshell=shellfn
+ dbgshell(user_ns=globalDict)
+ except Exception, e:
CONSOLE_TEXT=">>> "
- input = " "
- while (input != ""):
+ inputline = " "
+ while (inputline != ""):
sys.stdout.write(CONSOLE_TEXT)
- input = sys.stdin.readline()
+ inputline = sys.stdin.readline()
try:
- exec(input) in globalDict
+ exec(inputline) in globalDict
except Exception, e:
sys.stdout.write(str(e) + "\n")
+
+ os._exit(0)
+
if (os.getenv("DEBUG", "0") == "1"):
- threading.Thread(target=lambda: realDebugConsole(globalDict)).start()
+ threading.Thread(name="debugConsole", target=lambda: realDebugConsole(globalDict)).start()
def stringPartition(s, field):
index = s.find(field)
@@ -260,6 +279,82 @@
ns = ns + c
return ns
+class Connection:
+
+ def __init__(self, host, port, authAndEncrypt=False, credentials=None):
+ self.host = host
+ self.port = port
+ self.credentials = credentials
+ self.authAndEncrypt = authAndEncrypt
+ self.connection = None
+ # XXXstroucki some thing may still depend on this (client)
+ self.username = None
+ if credentials is not None:
+ self.username = credentials[0]
+
+ def __connect(self):
+ # create new connection
+
+ username = None
+ password = None
+
+ if self.credentials is not None:
+ username = self.credentials[0]
+ password = self.credentials[1]
+
+ if self.authAndEncrypt:
+ if username is None:
+ username = raw_input("Enter Username:")
+
+ if password is None:
+ password = raw_input("Enter Password:")
+
+ if self.credentials != (username, password):
+ self.credentials = (username, password)
+
+ client = rpycservices.client(self.host, self.port, username=username, password=password)
+ else:
+ client = rpycservices.client(self.host, self.port)
+
+ self.connection = client
+
+
+ def __do(self, name, *args, **kwargs):
+ if self.connection is None:
+ self.__connect()
+
+ threadname = "%s:%s" % (self.host, self.port)
+ # XXXstroucki: Use 10 second timeout, ok?
+ # XXXstroucki: does this fn touch the network?
+ t = TimeoutThread(getattr, (self.connection, name, None))
+ threading.Thread(name=threadname, target=t.run).start()
+
+ try:
+ remotefn = t.wait(timeout=10)
+ except TimeoutException:
+ self.connection = None
+ raise
+
+ try:
+ if callable(remotefn):
+ # XXXstroucki: Use 10 second timeout, ok?
+ t = TimeoutThread(remotefn, args, kwargs)
+ threading.Thread(name=threadname, target=t.run).start()
+ returns = t.wait(timeout=10.0)
+
+ else:
+ raise TashiException({'msg':'%s not callable' % name})
+
+ except:
+ self.connection = None
+ raise
+
+ return returns
+
+ def __getattr__(self, name):
+ return functools.partial(self.__do, name)
+
+
def createClient(config):
cfgHost = config.get('Client', 'clusterManagerHost')
cfgPort = config.get('Client', 'clusterManagerPort')
@@ -273,14 +368,12 @@
authAndEncrypt = boolean(config.get('Security', 'authAndEncrypt'))
if authAndEncrypt:
username = config.get('AccessClusterManager', 'username')
- if username == '':
- username = raw_input('Enter Username:')
password = config.get('AccessClusterManager', 'password')
- if password == '':
- password = getpass.getpass('Enter Password:')
- client = rpycservices.client(host, port, username=username, password=password)
+ client = Connection(host, port, authAndEncrypt, (username, password))
+
else:
- client = rpycservices.client(host, port)
+ client = Connection(host, port)
+
return client
def enumToStringDict(cls):
diff --git a/src/utils/Makefile b/src/tashi/utils/__init__.py
similarity index 92%
rename from src/utils/Makefile
rename to src/tashi/utils/__init__.py
index aea56ee..c33c252 100644
--- a/src/utils/Makefile
+++ b/src/tashi/utils/__init__.py
@@ -15,10 +15,3 @@
# specific language governing permissions and limitations
# under the License.
-all: nmd
-
-clean:
- rm -f ./nmd
-
-nmd: nmd.c
- ${CC} $< -o $@
diff --git a/src/tashi/utils/config.py b/src/tashi/utils/config.py
new file mode 100644
index 0000000..0843b1a
--- /dev/null
+++ b/src/tashi/utils/config.py
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Wrapper class for python configuration
+
+class Config:
+ def __init__(self, additionalNames=[], additionalFiles=[]):
+ from tashi.util import getConfig
+ (config, files) = getConfig(additionalNames = additionalNames, additionalFiles = additionalFiles)
+ self.config = config
+ self.files = files
+
+ def getFiles(self):
+ return self.files
+
+ def get(self, section, option, default = None):
+ # soft version of self.config.get. Returns configured
+ # value or default value (if specified) or None.
+ import ConfigParser
+
+ value = default
+ try:
+ value = self.config.get(section, option)
+ except ConfigParser.NoOptionError:
+ pass
+
+ return value
+
+ def getint(self, section, option, default = None):
+ # soft version of self.config.getint. Returns configured
+ # value forced to int or default value (as and if specified)
+ # or None.
+ import ConfigParser
+
+ value = default
+ try:
+ value = self.config.get(section, option)
+ value = int(value)
+ except ConfigParser.NoOptionError:
+ pass
+
+ return value
+
+ def items(self, *args, **kwargs):
+ return self.config.items(*args, **kwargs)
diff --git a/src/tashi/agents/pseudoDes.py b/src/tashi/utils/pseudoDes.py
similarity index 96%
rename from src/tashi/agents/pseudoDes.py
rename to src/tashi/utils/pseudoDes.py
index 3d3bf0b..6d7a800 100755
--- a/src/tashi/agents/pseudoDes.py
+++ b/src/tashi/utils/pseudoDes.py
@@ -1,5 +1,4 @@
-#! /usr/bin/env python
-
+#!/usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -17,6 +16,8 @@
# specific language governing permissions and limitations
# under the License.
+# XXXstroucki: why pseudo?
+
values = {1:(0xcba4e531, 0x12be4590),
2:(0x537158eb, 0xab54ce58),
3:(0x145cdc3c, 0x6954c7a6),
@@ -26,7 +27,7 @@
short = short & 0xffff
char = char & 0xff
value = short ^ (char << 8)
- for i in range(0, 8):
+ for __i in range(0, 8):
if value & 0x8000:
value = (value << 1) ^ 4129
else:
diff --git a/src/tashi/utils/timeout.py b/src/tashi/utils/timeout.py
new file mode 100644
index 0000000..cc9d850
--- /dev/null
+++ b/src/tashi/utils/timeout.py
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# module to provide a thread timeout monitor
+# by Alexey Tumanov and Michael Stroucken
+
+import threading
+
+class TimeoutException(Exception):
+ def __init__(self, string):
+ Exception.__init__(self,'Timeout: %s' % string)
+
+class TimeoutThread:
+ def __init__(self, function, args = (), kwargs = {}):
+ self.cv = threading.Condition()
+ self.function = function
+ self.args = args
+ self.kwargs = kwargs
+ self.finished = False
+ self.rval = None
+
+ def wait(self, timeout=None):
+ self.cv.acquire()
+ if not self.finished:
+ if timeout:
+ self.cv.wait(timeout)
+ else:
+ self.cv.wait()
+ finished = self.finished
+ rval = self.rval
+ self.cv.release()
+
+ #
+ # Raise an exception if a timeout occurred.
+ #
+ if finished:
+ return rval
+ else: # NOTE: timeout must be set for this to be true.
+ raise TimeoutException("function %s timed out after %f seconds" % (str(self.function), timeout))
+
+ def run(self):
+ try:
+ rval = self.function(*self.args, **self.kwargs)
+ except Exception, e:
+ rval = e
+
+ self.cv.acquire()
+ self.finished = True
+ self.rval = rval
+ self.cv.notify()
+ self.cv.release()
+
diff --git a/src/utils/getLocality.py b/src/utils/getLocality.py
deleted file mode 100755
index 49ecb11..0000000
--- a/src/utils/getLocality.py
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/python
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import sys
-import os
-from os import system
-
-import tashi.services.layoutlocality.localityservice as localityservice
-
-from thrift import Thrift
-from thrift.transport import TSocket
-from thrift.transport import TTransport
-from thrift.protocol import TBinaryProtocol
-
-from tashi.util import getConfig
-
-(config, configFiles) = getConfig(["Client"])
-host = config.get('LocalityService', 'host')
-port = int(config.get('LocalityService', 'port'))
-
-socket = TSocket.TSocket(host, port)
-transport = TTransport.TBufferedTransport(socket)
-protocol = TBinaryProtocol.TBinaryProtocol(transport)
-client = localityservice.Client(protocol)
-transport.open()
-
-while True:
- line1 = "\n"
- line2 = "\n"
- while line1 != "":
- line1 = sys.stdin.readline()
- if line1 == "":
- sys.exit(0)
- if line1 != "\n":
- break
- line1 = line1.strip()
- while line2 != "":
- line2 = sys.stdin.readline()
- if line2 == "":
- sys.exit(0)
- if line2 != "\n":
- break
- line2 = line2.strip()
-
- sources = line1.split(" ")
- destinations = line2.split(" ")
-
- mat = client.getHopCountMatrix(sources, destinations)
- for r in mat:
- for c in r:
- print '%f\t'%c,
- print '\n',
- print '\n',
diff --git a/src/utils/nmd.c b/src/utils/nmd.c
deleted file mode 100644
index effa1d2..0000000
--- a/src/utils/nmd.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <dirent.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <assert.h>
-
-#define SLEEP_INTERVAL 10
-#define TASHI_PATH "/usr/local/tashi/"
-#define LOG_FILE "/var/log/nodemanager.log"
-
-/* This function changes (on Linux!) its oom scoring, to make it
- * unattractive to kill
- */
-
-void make_invincible()
-{
- int oom_adj_fd;
- int r;
-
- oom_adj_fd = open("/proc/self/oom_adj", O_WRONLY);
- assert(oom_adj_fd != -1);
- r = write(oom_adj_fd, "-17\n", 4);
- assert(r == 4);
- close(oom_adj_fd);
-
-}
-
-/* This function resets (on Linux!) its oom scoring to default
- */
-void make_vulnerable()
-{
- int oom_adj_fd;
- int r;
-
- oom_adj_fd = open("/proc/self/oom_adj", O_WRONLY);
- assert(oom_adj_fd != -1);
- r = write(oom_adj_fd, "0\n", 2);
- assert(r == 2);
- close(oom_adj_fd);
-}
-
-int main(int argc, char **argv)
-{
- char* env[2];
- int status;
- DIR* d;
- int pid;
- int lfd;
- int foreground=0;
-
-/* If first argument is "-f", run in foreground */
- if ((argc > 1) && (strncmp(argv[1], "-f", 3)==0)) {
- foreground=1;
- }
-/* If not running in foreground, fork off and exit the parent.
- * The child closes its default file descriptors.
- */
- if (!foreground) {
- pid = fork();
- if (pid != 0) {
- exit(0);
- }
- close(0);
- close(1);
- close(2);
- }
-/* Adjust OOM preference */
- make_invincible();
-/* Configure environment of children */
- env[0] = "PYTHONPATH="TASHI_PATH"/src/";
- env[1] = NULL;
- while (1) {
- pid = fork();
- if (pid == 0) {
- /* child */
- /* nodemanagers are vulnerable. Not the supervisor. */
- make_vulnerable();
- if (!foreground) {
- /* If not running fg, open log file */
- lfd = open(LOG_FILE, O_WRONLY|O_APPEND|O_CREAT);
- if (lfd < 0) {
- /* If this failed, open something? */
- lfd = open("/dev/null", O_WRONLY);
- }
- /* Make this fd stdout and stderr */
- dup2(lfd, 2);
- dup2(lfd, 1);
- /* close stdin */
- close(0);
- }
- chdir(TASHI_PATH);
- /* start node manager with python environment */
- execle("./bin/nodemanager.py", "./bin/nodemanager.py", NULL, env);
- exit(-1);
- }
- /* sleep before checking for child's status */
- sleep(SLEEP_INTERVAL);
- /* catch child exiting and go through loop again */
- waitpid(pid, &status, 0);
- } /* while (1) */
-}
diff --git a/src/utils/nmd.py b/src/utils/nmd.py
index e74a82f..118aee8 100755
--- a/src/utils/nmd.py
+++ b/src/utils/nmd.py
@@ -16,9 +16,10 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
"""
+# XXXstroucki: why not use something like supervise instead?
import os
import sys
@@ -36,81 +37,81 @@
*/
"""
def make_invincible():
- # dependent on linux
- try:
- oom_adj_fd = os.open("/proc/self/oom_adj", os.O_WRONLY)
- except IOError:
- pass
- else:
- os.write(oom_adj_fd, "-17\n")
- os.close(oom_adj_fd)
+ # dependent on linux
+ try:
+ oom_adj_fd = os.open("/proc/self/oom_adj", os.O_WRONLY)
+ except IOError:
+ pass
+ else:
+ os.write(oom_adj_fd, "-17\n")
+ os.close(oom_adj_fd)
"""
/* This function resets (on Linux!) its oom scoring to default
*/
"""
def make_vulnerable():
- # dependent on linux
- try:
- oom_adj_fd = os.open("/proc/self/oom_adj", os.O_WRONLY)
- except IOError:
- pass
- else:
- os.write(oom_adj_fd, "0\n")
- os.close(oom_adj_fd)
+ # dependent on linux
+ try:
+ oom_adj_fd = os.open("/proc/self/oom_adj", os.O_WRONLY)
+ except IOError:
+ pass
+ else:
+ os.write(oom_adj_fd, "0\n")
+ os.close(oom_adj_fd)
def main(argv=None):
- if argv is None:
- argv = sys.argv
- try:
- opts, args = getopt.getopt(argv[1:], "f", ["foreground"])
- except getopt.GetoptError, err:
- # print help information and exit:
- print str(err) # will print something like "option -a not recognized"
- # usage()
- return 2
- foreground = False
- for o, a in opts:
- if o in ("-f", "--foreground"):
- foreground = True
- else:
- assert False, "unhandled option"
- if foreground == False:
- pid = os.fork();
- if pid != 0:
- os._exit(0)
- os.close(0)
- os.close(1)
- os.close(2)
+ if argv is None:
+ argv = sys.argv
+ try:
+ opts, args = getopt.getopt(argv[1:], "f", ["foreground"])
+ except getopt.GetoptError, err:
+ # print help information and exit:
+ print str(err) # will print something like "option -a not recognized"
+ # usage()
+ return 2
+ foreground = False
+ for o, a in opts:
+ if o in ("-f", "--foreground"):
+ foreground = True
+ else:
+ assert False, "unhandled option"
+ if foreground == False:
+ pid = os.fork();
+ if pid != 0:
+ os._exit(0)
+ os.close(0)
+ os.close(1)
+ os.close(2)
- # adjust oom preference
- make_invincible()
+ # adjust oom preference
+ make_invincible()
- # configure environment of children
- env = {"PYTHONPATH":TASHI_PATH+"/src"}
- while True:
- pid = os.fork();
- if pid == 0:
- # child
- # nodemanagers are vulnerable, not the supervisor
- make_vulnerable()
- if foreground == False:
- try:
- lfd = os.open(LOG_FILE, os.O_APPEND|os.O_CREAT|os.O_WRONLY)
- except IOError:
- lfd = os.open("/dev/null", os.O_WRONLY)
- # make this fd stdout and stderr
- os.dup2(lfd, 1)
- os.dup2(lfd, 2)
- # close stdin
- os.close(0)
- os.chdir(TASHI_PATH)
- os.execle("./bin/nodemanager.py", "./bin/nodemanager.py", env)
- os._exit(-1)
- # sleep before checking child status
- time.sleep(SLEEP_INTERVAL)
- os.waitpid(pid, 0)
- return 0
+ # configure environment of children
+ env = {"PYTHONPATH":TASHI_PATH+"/src"}
+ while True:
+ pid = os.fork();
+ if pid == 0:
+ # child
+ # nodemanagers are vulnerable, not the supervisor
+ make_vulnerable()
+ if foreground == False:
+ try:
+ lfd = os.open(LOG_FILE, os.O_APPEND|os.O_CREAT|os.O_WRONLY)
+ except IOError:
+ lfd = os.open("/dev/null", os.O_WRONLY)
+ # make this fd stdout and stderr
+ os.dup2(lfd, 1)
+ os.dup2(lfd, 2)
+ # close stdin
+ os.close(0)
+ os.chdir(TASHI_PATH)
+ os.execle("./bin/nodemanager.py", "./bin/nodemanager.py", env)
+ os._exit(-1)
+ # sleep before checking child status
+ time.sleep(SLEEP_INTERVAL)
+ os.waitpid(pid, 0)
+ return 0
if __name__ == "__main__":
- sys.exit(main())
+ sys.exit(main())
diff --git a/src/zoni/agents/dhcpdns.py b/src/zoni/agents/dhcpdns.py
index ece9e29..48cc2b7 100644
--- a/src/zoni/agents/dhcpdns.py
+++ b/src/zoni/agents/dhcpdns.py
@@ -27,6 +27,7 @@
#from instancehook import InstanceHook
#from tashi.services.ttypes import Instance, NetworkConfiguration
#from tashi import boolean
+from tashi.rpycservices.rpyctypes import Instance
class DhcpDns():
diff --git a/src/zoni/bootstrap/bootstrapinterface.py b/src/zoni/bootstrap/bootstrapinterface.py
index 54b2ef8..35de879 100644
--- a/src/zoni/bootstrap/bootstrapinterface.py
+++ b/src/zoni/bootstrap/bootstrapinterface.py
@@ -18,10 +18,6 @@
# $Id$
#
-import sys
-import os
-import optparse
-
class BootStrapInterface(object):
""" Interface description for booting
diff --git a/src/zoni/bootstrap/pxe.py b/src/zoni/bootstrap/pxe.py
index 34c13f4..37d8c25 100644
--- a/src/zoni/bootstrap/pxe.py
+++ b/src/zoni/bootstrap/pxe.py
@@ -22,7 +22,6 @@
import sys
import string
import datetime
-import subprocess
import MySQLdb
import traceback
import logging
diff --git a/src/zoni/client/zoni-cli.py b/src/zoni/client/zoni-cli.py
index 060ec16..9ddf52f 100755
--- a/src/zoni/client/zoni-cli.py
+++ b/src/zoni/client/zoni-cli.py
@@ -20,43 +20,36 @@
#
# $Id$
#
-import os
-import sys
+
import optparse
import socket
import logging.config
import getpass
+import os
+import sys
import re
+import string
import subprocess
-
-
#from zoni import *
#from zoni.data.resourcequerysql import ResourceQuerySql
-import zoni
-from zoni.data.resourcequerysql import *
+#import zoni
+#from zoni.data.resourcequerysql import *
-from zoni.data.usermanagementinterface import UserManagementInterface
-from zoni.data.usermanagementinterface import UserManagementInterface
-
-from zoni.bootstrap.bootstrapinterface import BootStrapInterface
from zoni.bootstrap.pxe import Pxe
-from zoni.hardware.systemmanagementinterface import SystemManagementInterface
from zoni.hardware.ipmi import Ipmi
-from zoni.hardware.hwswitchinterface import HwSwitchInterface
from zoni.hardware.dellswitch import HwDellSwitch
from zoni.hardware.raritanpdu import raritanDominionPx
from zoni.hardware.delldrac import dellDrac
+import zoni.hardware.systemmanagement
+from zoni.data import usermanagement
from zoni.agents.dhcpdns import DhcpDns
-from zoni.hardware.systemmanagement import SystemManagement
+from zoni.extra.util import validIp, validMac
+from zoni.version import version, revision
-
-from zoni.extra.util import *
-from zoni.version import *
-
-from tashi.util import instantiateImplementation, signalHandler
+from tashi.util import instantiateImplementation, getConfig
#import zoni.data.usermanagement
#from usermanagement import UserManagement
@@ -74,7 +67,7 @@
(configs, configFiles) = getConfig()
logging.config.fileConfig(configFiles)
- log = logging.getLogger(os.path.basename(__file__))
+ #log = logging.getLogger(os.path.basename(__file__))
#logit(configs['logFile'], "Starting Zoni client")
#logit(configs['logFile'], "Loading config file")
@@ -359,11 +352,11 @@
if (options.nodeName):
cmdargs["sys_id"] = options.nodeName
- if (options.numCores or options.clockSpeed or options.numMemory or options.numProcs or options.cpuFlags) and not options.showResources:
- usage = "MISSING OPTION: When specifying hardware parameters, you need the -s or --showResources switch"
- print usage
- parser.print_help()
- exit()
+ if (options.numCores or options.clockSpeed or options.numMemory or options.numProcs or options.cpuFlags) and not options.showResources:
+ usage = "MISSING OPTION: When specifying hardware parameters, you need the -s or --showResources switch"
+ print usage
+ parser.print_help()
+ exit()
if options.getResources:
print "ALL resources"
@@ -464,7 +457,7 @@
userId = usermgt.getUserId(options.userName)
if userId:
- reservationId = reservation.createReservation(userId, options.reservationDuration, options.myNotes + " " + str(string.join(args[0:len(args)])))
+ __reservationId = reservation.createReservation(userId, options.reservationDuration, options.myNotes + " " + str(string.join(args[0:len(args)])))
else:
print "user doesn't exist"
@@ -771,7 +764,7 @@
try:
socket.gethostbyname(hostName)
sys.stdout.write("[Success]\n")
- except Exception, e:
+ except Exception:
sys.stdout.write("[Fail]\n")
else:
mesg = "ERROR: Malformed IP Address\n"
@@ -794,7 +787,7 @@
try:
socket.gethostbyname(hostName)
sys.stdout.write("[Fail]\n")
- except Exception, e:
+ except Exception:
sys.stdout.write("[Success]\n")
if options.removeDhcp:
dhcpdns.removeDhcp(hostName)
diff --git a/src/zoni/data/infostore.py b/src/zoni/data/infostore.py
index bd389c2..7651b43 100644
--- a/src/zoni/data/infostore.py
+++ b/src/zoni/data/infostore.py
@@ -18,10 +18,6 @@
# $Id$
#
-import sys
-import os
-import optparse
-
class InfoStore (object):
""" Interface description for query system resources
diff --git a/src/zoni/data/reservation.py b/src/zoni/data/reservation.py
index 3c53ec1..8a43fee 100644
--- a/src/zoni/data/reservation.py
+++ b/src/zoni/data/reservation.py
@@ -18,7 +18,6 @@
# $Id:$
#
-import sys
import os
import string
import logging
@@ -88,9 +87,9 @@
def delReservation (self, userId):
raise NotImplementedError
- def defineReservation():
+ def defineReservation(self):
raise NotImplementedError
- def showReservation():
+ def showReservation(self):
raise NotImplementedError
diff --git a/src/zoni/data/reservationmanagementinterface.py b/src/zoni/data/reservationmanagementinterface.py
index 1f90bc7..620d15e 100644
--- a/src/zoni/data/reservationmanagementinterface.py
+++ b/src/zoni/data/reservationmanagementinterface.py
@@ -18,9 +18,6 @@
# $Id:$
#
-import sys
-import os
-
class ReservationManagementInterface(object):
""" Interface description for reservation management
@@ -50,9 +47,9 @@
def delReservation (self, userId):
raise NotImplementedError
- def defineReservation():
+ def defineReservation(self):
raise NotImplementedError
- def showReservation():
+ def showReservation(self):
raise NotImplementedError
diff --git a/src/zoni/data/resourcequerysql.py b/src/zoni/data/resourcequerysql.py
index 2beafd4..4ea3ed3 100644
--- a/src/zoni/data/resourcequerysql.py
+++ b/src/zoni/data/resourcequerysql.py
@@ -22,16 +22,13 @@
import sys
import string
import MySQLdb
-import subprocess
import traceback
import logging
-import threading
import time
import usermanagement
from zoni.data.infostore import InfoStore
-from zoni.extra.util import checkSuper, createKey
-from zoni.agents.dhcpdns import DhcpDns
+from zoni.extra.util import createKey
class ResourceQuerySql(InfoStore):
def __init__(self, config, verbose=None):
@@ -80,7 +77,7 @@
vlans = []
for val in vlanInfo.split(","):
try:
- ret = self.getVlanId(val.split(":")[0])
+ __ret = self.getVlanId(val.split(":")[0])
vlans.append(val)
except Exception, e:
print e
@@ -96,7 +93,7 @@
domainKey = createKey(name)
query = "insert into domaininfo (domain_name, domain_desc, domain_key) values ('%s','%s', '%s')" % (name, desc, domainKey)
try:
- result = self.insertDb(query)
+ __result = self.insertDb(query)
mesg = "Adding domain %s(%s)" % (name, desc)
self.log.info(mesg)
except Exception, e:
@@ -112,7 +109,7 @@
vlanType = i.split(":")[1]
query = "insert into domainmembermap values (%d, %d, '%s')" % (domainId, vlanId, vlanType)
try:
- result = self.insertDb(query)
+ __result = self.insertDb(query)
except Exception, e:
print e
@@ -133,16 +130,16 @@
mesg = "Removing domain %s" % (name)
self.log.info(mesg)
query = "delete from domaininfo where domain_name = '%s'" % (name)
- result = self.__deleteDb(query)
+ __result = self.__deleteDb(query)
# Need to remove any vlans attached to this domain
query = "delete from domainmembermap where domain_id = '%s'" % (domainId)
- result = self.__deleteDb(query)
+ __result = self.__deleteDb(query)
def showDomains(self):
usermgt = eval("usermanagement.%s" % (self.config['userManagement']) + "()")
query = "select r.reservation_id, r.user_id, d.domain_name, d.domain_desc from domaininfo d, allocationinfo a, reservationinfo r where d.domain_id = a.domain_id and a.reservation_id = r.reservation_id"
result = self.selectDb(query)
- desc = result.description
+ #desc = result.description
if result.rowcount > 0:
print "%s\t%s\t%s\t%s" % (result.description[0][0], result.description[1][0], result.description[2][0], result.description[3][0])
print "------------------------------------------------------------"
@@ -173,7 +170,7 @@
return -1
query = "insert into vlaninfo (vlan_num, vlan_desc) values ('%s','%s')" % (vnumber, desc)
try:
- result = self.insertDb(query)
+ __result = self.insertDb(query)
mesg = "Adding vlan %s(%s)" % (vnumber, desc)
self.log.info(mesg)
except Exception, e:
@@ -290,8 +287,6 @@
def showResources(self, cmdargs):
-
- queryopt = ""
defaultFields = "mac_addr, location, num_procs, num_cores, clock_speed, mem_total "
#defaultFields = "*"
@@ -315,20 +310,21 @@
# header
print line
- sum = {}
+ _sum = {}
for row in result.fetchall():
line = ""
- sum['totProc'] = sum.get('totProc', 0)
- sum['totProc'] += int(row[2])
- sum['totCores'] = sum.get('totCores', 0)
- sum['totCores'] += int(row[3])
- sum['totMemory'] = sum.get('totMemory', 0)
- sum['totMemory'] += int(row[5])
+ _sum['totProc'] = _sum.get('totProc', 0)
+ _sum['totProc'] += int(row[2])
+ _sum['totCores'] = _sum.get('totCores', 0)
+ _sum['totCores'] += int(row[3])
+ _sum['totMemory'] = _sum.get('totMemory', 0)
+ _sum['totMemory'] += int(row[5])
for val in row:
line += str(val).center(20)
print line
- print "\n%s systems registered - %d procs | %d cores | %d bytes RAM" % (str(result.rowcount), sum['totProc'], sum['totCores'], sum['totMemory'],)
+ print "\n%s systems registered - %d procs | %d cores | %d bytes RAM" % \
+ (str(result.rowcount), _sum['totProc'], _sum['totCores'], _sum['totMemory'],)
def getAvailableResources(self):
# Maybe should add a status flag?
@@ -513,7 +509,7 @@
result = self.selectDb(query)
print "NODE ALLOCATION\n"
- sum = {}
+ _sum = {}
if self.verbose:
print "%-5s%-10s%-10s%-10s%-13s%-12s%-10s%-34s%-20s%s" % ("Res", "User", "Host", "Domain", "Cores/Mem","Expiration", "Hostname", "Boot Image Name", "Vlan Member", "Notes")
else:
@@ -538,10 +534,10 @@
allocation_id = i[11]
userName = usermgt.getUserName(uid)
combined_notes = str(rnotes) + "|" + str(anotes)
- sum['totCores'] = sum.get('totCores', 0)
- sum['totCores'] += cores
- sum['totMemory'] = sum.get('totMemory', 0)
- sum['totMemory'] += memory
+ _sum['totCores'] = _sum.get('totCores', 0)
+ _sum['totCores'] += cores
+ _sum['totMemory'] = _sum.get('totMemory', 0)
+ _sum['totMemory'] += memory
if self.verbose:
query = "select v.vlan_num, m.vlan_type from vlaninfo v, vlanmembermap m where v.vlan_id = m.vlan_id and allocation_id = '%d' order by vlan_num asc" % allocation_id
vlanRes = self.selectDb(query)
@@ -555,7 +551,7 @@
print "%-5s%-10s%-10s%-10s%-2s/%-10s%-12s%-10s%-34s%-20s%s" % (resId, userName, host, domain, cores, memory,expire, hostname, image_name, vlanMember,combined_notes)
else:
print "%-10s%-10s%-10s%-2s/%-10s%-12s%s" % (userName, host, domain, cores, memory,expire, combined_notes)
- print "\n%s systems allocated - %d cores| %d bytes RAM" % (str(result.rowcount), sum['totCores'], sum['totMemory'])
+ print "\n%s systems allocated - %d cores| %d bytes RAM" % (str(result.rowcount), _sum['totCores'], _sum['totMemory'])
def showReservation(self, userId=None):
#from IPython.Shell import IPShellEmbed
@@ -617,7 +613,7 @@
query = "select image_name from imageinfo"
result = self.selectDb(query)
row = result.fetchall()
- desc = result.description
+ #desc = result.description
imagelist = []
for i in row:
@@ -630,7 +626,7 @@
query = "select image_name, dist, dist_ver from imageinfo"
result = self.selectDb(query)
row = result.fetchall()
- desc = result.description
+ #desc = result.description
for i in row:
print i
@@ -805,7 +801,7 @@
cursor.execute (query)
self.conn.commit()
row = cursor.fetchall()
- desc = cursor.description
+ #desc = cursor.description
except MySQLdb.OperationalError, e:
msg = "%s : %s" % (e[1], query)
self.log.error(msg)
@@ -886,12 +882,12 @@
return cursor
- def updateReservation (self, reservationId, userId=None, reservationDuration=None, vlanIsolate=None, allocationNotes=None):
+ def updateReservation (self, reservationId, userId=None, resDuration=None, vlanIsolate=None, allocationNotes=None):
mesg = "Updating reservation %s" % (str(reservationId))
self.log.info(mesg)
- if reservationDuration:
+ if resDuration:
if len(resDuration) == 8:
expireDate = resDuration
elif len(resDuration) < 4:
@@ -906,7 +902,7 @@
mesg = "Updating reservationDuration :" + resDuration
self.log.info(mesg)
- query = "update reservationinfo set reservation_exiration = \"" + expireDate_ + "\" where reservation_id = \"" + str(reservationId) + "\""
+ query = "update reservationinfo set reservation_expiration = \"" + expireDate + "\" where reservation_id = \"" + str(reservationId) + "\""
self.__updateDb(query)
if allocationNotes:
@@ -1062,7 +1058,7 @@
vId = self.getVlanId(v)
query = "delete from vlanmembermap where allocation_id = '%s' and vlan_id = '%s'" % (allocationId, vId)
- result = self.insertDb(query)
+ __result = self.insertDb(query)
mesg = "Removing vlan %s from node %s" % (v, nodeName)
self.log.info(mesg)
@@ -1153,7 +1149,7 @@
# imagemap db should be sys_id instead of mac_addr
# change later
- cur_image = host['pxe_image_name']
+ #cur_image = host['pxe_image_name']
# Get the id of the new image
query = "select image_id from imageinfo where image_name = " + "\"" + image + "\""
row = self.__queryDb(query)
@@ -1234,7 +1230,7 @@
return cap
# print out data in a consistent format
- def __showIt(data):
+ def __showIt(self, data):
pass
diff --git a/src/zoni/data/usermanagement.py b/src/zoni/data/usermanagement.py
index 4ccbd17..fad0c4e 100644
--- a/src/zoni/data/usermanagement.py
+++ b/src/zoni/data/usermanagement.py
@@ -18,7 +18,6 @@
# $Id$
#
-import sys
import os
from usermanagementinterface import UserManagementInterface
diff --git a/src/zoni/data/usermanagementinterface.py b/src/zoni/data/usermanagementinterface.py
index ec34331..c7386f1 100644
--- a/src/zoni/data/usermanagementinterface.py
+++ b/src/zoni/data/usermanagementinterface.py
@@ -18,9 +18,6 @@
# $Id$
#
-import sys
-import os
-
class UserManagementInterface(object):
""" Interface description for user management
diff --git a/src/zoni/extra/util.py b/src/zoni/extra/util.py
index 54fc6d0..0381823 100644
--- a/src/zoni/extra/util.py
+++ b/src/zoni/extra/util.py
@@ -19,6 +19,7 @@
#
import os
+import sys
import string
import ConfigParser
import time
@@ -26,7 +27,6 @@
import re
import threading
import subprocess
-import logging
def loadConfigFile(parser):
#parser = ConfigParser.ConfigParser()
@@ -222,32 +222,43 @@
f.close()
return val
-
-
+def __getShellFn():
+ try:
+ from IPython.Shell import IPShellEmbed
+ return (1, IPShellEmbed)
+ except ImportError:
+ import IPython
+ return (2, IPython.embed)
def debugConsole(globalDict):
"""A debugging console that optionally uses pysh"""
def realDebugConsole(globalDict):
try :
import atexit
- from IPython.Shell import IPShellEmbed
+ (calltype, shellfn) = __getShellFn()
def resetConsole():
# XXXpipe: make input window sane
- (stdin, stdout) = os.popen2("reset")
+ (__stdin, stdout) = os.popen2("reset")
stdout.read()
- dbgshell = IPShellEmbed()
atexit.register(resetConsole)
- dbgshell(local_ns=globalDict, global_ns=globalDict)
- except Exception:
+ if calltype == 1:
+ dbgshell=shellfn(user_ns=globalDict)
+ dbgshell()
+ elif calltype == 2:
+ dbgshell=shellfn
+ dbgshell(user_ns=globalDict)
+ except Exception, e:
CONSOLE_TEXT=">>> "
- input = " "
- while (input != ""):
+ inputline = " "
+ while (inputline != ""):
sys.stdout.write(CONSOLE_TEXT)
- input = sys.stdin.readline()
+ inputline = sys.stdin.readline()
try:
- exec(input) in globalDict
+ exec(inputline) in globalDict
except Exception, e:
sys.stdout.write(str(e) + "\n")
+
+ os._exit(0)
+
if (os.getenv("DEBUG", "0") == "1"):
threading.Thread(target=lambda: realDebugConsole(globalDict)).start()
-
diff --git a/src/zoni/hardware/apcswitchedrackpdu.py b/src/zoni/hardware/apcswitchedrackpdu.py
index 7c8ba9d..cf06725 100644
--- a/src/zoni/hardware/apcswitchedrackpdu.py
+++ b/src/zoni/hardware/apcswitchedrackpdu.py
@@ -18,8 +18,6 @@
# $Id$
#
-import sys
-import os
import warnings
warnings.filterwarnings("ignore")
@@ -46,7 +44,7 @@
def getPowerStatus(self):
thisoid = eval(str(self.oid_status) + "," + str(self.port))
- errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+ __errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
cmdgen.CommunityData('my-agent', self.user, 0), \
cmdgen.UdpTransportTarget((self.pdu_name, 161)), thisoid)
output = varBinds[0][1]
@@ -81,7 +79,7 @@
def powerOn(self):
thisoid = eval(str(self.oid_status) + "," + str(self.port))
- errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().setCmd( \
+ __errorIndication, __errorStatus, __errorIndex, __varBinds = cmdgen.CommandGenerator().setCmd( \
cmdgen.CommunityData('my-agent', self.user, 1), \
cmdgen.UdpTransportTarget((self.pdu_name, 161)), \
(thisoid, rfc1902.Integer('1')))
@@ -89,7 +87,7 @@
def powerOff(self):
thisoid = eval(str(self.oid_status) + "," + str(self.port))
- errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().setCmd( \
+ __errorIndication, __errorStatus, __errorIndex, __varBinds = cmdgen.CommandGenerator().setCmd( \
cmdgen.CommunityData('my-agent', self.user, 1), \
cmdgen.UdpTransportTarget((self.pdu_name, 161)), \
(thisoid, rfc1902.Integer('2')))
diff --git a/src/zoni/hardware/delldrac.py b/src/zoni/hardware/delldrac.py
index 7cb189f..7560666 100644
--- a/src/zoni/hardware/delldrac.py
+++ b/src/zoni/hardware/delldrac.py
@@ -19,14 +19,13 @@
#
import sys
-import os
import pexpect
import time
import logging
import tempfile
from systemmanagementinterface import SystemManagementInterface
-from zoni.extra.util import timeF, log
+from zoni.extra.util import timeF
class dellDrac(SystemManagementInterface):
@@ -116,19 +115,19 @@
child = self.__login()
child.logfile = fout
- cmd = "racadm serveraction -m " + self.server + " powerup"
+ cmd = "racadm serveraction -m %s powerup" % (self.server)
child.sendline(cmd)
- i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
+ __i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
fout.seek(0)
- self.log.info("Hardware power on : %s", self.hostname)
+ self.log.info("Hardware power on : %s" % self.hostname)
for val in fout.readlines():
if "OK" in val:
code = 1
if "ALREADY POWER-ON" in val:
code = 1
- self.log.info("Hardware already powered on : %s", self.hostname)
+ self.log.info("Hardware already powered on : %s" % self.hostname)
if code < 1:
- self.log.info("Hardware power on failed : %s", self.hostname)
+ self.log.info("Hardware power on failed : %s" % self.hostname)
fout.close()
child.terminate()
return code
@@ -139,15 +138,15 @@
fout = tempfile.TemporaryFile()
child = self.__login()
child.logfile = fout
- cmd = "racadm serveraction -m " + self.server + " powerdown"
+ cmd = "racadm serveraction -m %s powerdown" % (self.server)
child.sendline(cmd)
- i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
+ __i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
fout.seek(0)
- self.log.info("Hardware power off : %s", self.hostname)
+ self.log.info("Hardware power off : %s" % self.hostname)
for val in fout.readlines():
if "OK" in val:
code = 1
- if "CURRENTLY POWER-OFF" in val:
+ if "CURRENTLY POWER-OFF" in val:
self.log.info("Hardware already power off : %s", self.hostname)
code = 1
if code < 1:
@@ -164,14 +163,14 @@
child.logfile = fout
cmd = "racadm serveraction -m " + self.server + " graceshutdown"
child.sendline(cmd)
- i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
+ __i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
fout.seek(0)
self.log.info("Hardware power off (soft): %s", self.hostname)
for val in fout.readlines():
if "OK" in val:
code = 1
- if "CURRENTLY POWER-OFF" in val:
+ if "CURRENTLY POWER-OFF" in val:
self.log.info("Hardware already power off : %s", self.hostname)
code = 1
if code < 1:
@@ -188,7 +187,7 @@
child.logfile = fout
cmd = "racadm serveraction -m " + self.server + " powercycle"
child.sendline(cmd)
- i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
+ __i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
fout.seek(0)
self.log.info("Hardware power cycle : %s", self.hostname)
for val in fout.readlines():
@@ -208,7 +207,7 @@
child.logfile = fout
cmd = "racadm serveraction -m " + self.server + " hardreset"
child.sendline(cmd)
- i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
+ __i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
fout.seek(0)
for val in fout.readlines():
if "OK" in val:
@@ -225,5 +224,5 @@
child = self.__login()
cmd = "connect -F " + self.server
child.sendline(cmd)
- i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
+ __i=child.expect(['DRAC/MC:', pexpect.EOF, pexpect.TIMEOUT])
child.terminate()
diff --git a/src/zoni/hardware/dellswitch.py b/src/zoni/hardware/dellswitch.py
index 0ddf8aa..63f8b30 100644
--- a/src/zoni/hardware/dellswitch.py
+++ b/src/zoni/hardware/dellswitch.py
@@ -18,24 +18,22 @@
# $Id$
#
-import os
-import sys
+
import pexpect
import datetime
-import time
import thread
-import string
-import getpass
import socket
import tempfile
+import os
import logging
+import sys
+import time
+import string
-#import zoni
-from zoni.data.resourcequerysql import *
from zoni.hardware.hwswitchinterface import HwSwitchInterface
from zoni.data.resourcequerysql import ResourceQuerySql
from zoni.agents.dhcpdns import DhcpDns
-from zoni.extra.util import *
+from zoni.extra.util import normalizeMac
''' Using pexpect to control switches because couldn't get snmp to work
@@ -54,7 +52,7 @@
pass
- def setVerbose(self, verbose):
+ def setVerbose(self, verbose):
self.verbose = verbose
def __login(self):
@@ -261,10 +259,10 @@
i=child.expect(['console','#', 'Name:', pexpect.EOF, pexpect.TIMEOUT], timeout=2)
i=child.expect(['console','#', 'Name:', pexpect.EOF, pexpect.TIMEOUT], timeout=2)
- except EOF:
+ except pexpect.EOF:
print "EOF", i
#child.sendline()
- except TIMEOUT:
+ except pexpect.TIMEOUT:
print "TIMEOUT", i
#child.interact(escape_character='\x1d', input_filter=None, output_filter=None)
@@ -463,16 +461,16 @@
child = self.__login()
cmd = "copy running-config startup-config"
child.sendline(cmd)
- i = child.expect(['y/n', pexpect.EOF, pexpect.TIMEOUT])
+ __i = child.expect(['y/n', pexpect.EOF, pexpect.TIMEOUT])
child.sendline("y")
child.terminate()
- def __saveConfig(self):
- cmd = "copy running-config startup-config"
- child.sendline(cmd)
- i = child.expect(['y/n', pexpect.EOF, pexpect.TIMEOUT])
- child.sendline("y")
- child.terminate()
+# def __saveConfig(self):
+# cmd = "copy running-config startup-config"
+# child.sendline(cmd)
+# __i = child.expect(['y/n', pexpect.EOF, pexpect.TIMEOUT])
+# child.sendline("y")
+# child.terminate()
def registerToZoni(self, user, password, host):
@@ -511,7 +509,7 @@
child.sendline(cmd)
val = host + "#"
tval = host + ">"
- i = child.expect([val, tval, '\n\r\n\r', "--More--", pexpect.EOF, pexpect.TIMEOUT])
+ __i = child.expect([val, tval, '\n\r\n\r', "--More--", pexpect.EOF, pexpect.TIMEOUT])
cmd = "show version"
child.sendline(cmd)
i = child.expect([val, tval, '\n\r\n\r', pexpect.EOF, pexpect.TIMEOUT])
@@ -547,19 +545,19 @@
user = "public"
oid = eval("1,3,6,1,4,1,674,10895,3000,1,2,100,1,0")
- errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+ __errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
cmdgen.CommunityData('my-agent', user, 0), \
cmdgen.UdpTransportTarget((host, 161)), oid)
a['hw_model'] = str(varBinds[0][1])
oid = eval("1,3,6,1,4,1,674,10895,3000,1,2,100,3,0")
- errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+ __errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
cmdgen.CommunityData('my-agent', user, 0), \
cmdgen.UdpTransportTarget((host, 161)), oid)
a['hw_make'] = str(varBinds[0][1])
oid = eval("1,3,6,1,4,1,674,10895,3000,1,2,100,4,0")
- errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+ __errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
cmdgen.CommunityData('my-agent', user, 0), \
cmdgen.UdpTransportTarget((host, 161)), oid)
a['hw_version_sw'] = str(varBinds[0][1])
diff --git a/src/zoni/hardware/f10s50switch.py b/src/zoni/hardware/f10s50switch.py
index 81c08ff..2641274 100644
--- a/src/zoni/hardware/f10s50switch.py
+++ b/src/zoni/hardware/f10s50switch.py
@@ -26,15 +26,13 @@
import time
import thread
import string
-import getpass
import socket
import tempfile
import logging
#import zoni
-from zoni.data.resourcequerysql import *
-from zoni.hardware.hwswitchinterface import HwSwitchInterface
from zoni.data.resourcequerysql import ResourceQuerySql
+from zoni.hardware.hwswitchinterface import HwSwitchInterface
from zoni.agents.dhcpdns import DhcpDns
@@ -49,7 +47,7 @@
self.log = logging.getLogger(os.path.basename(__file__))
- def setVerbose(self, verbose):
+ def setVerbose(self, verbose):
self.verbose = verbose
def __login(self):
@@ -138,7 +136,7 @@
child.expect(["conf-if", pexpect.EOF])
child.sendline("switchport")
child.sendline("exit")
- child.sendline("interface vlan " + vlan")
+ child.sendline("interface vlan %s" % vlan)
child.expect(["conf-if", pexpect.EOF])
cmd = "tagged port-channel 1"
child.sendline(cmd)
@@ -214,10 +212,10 @@
i=child.expect(['console','#', 'Name:', pexpect.EOF, pexpect.TIMEOUT], timeout=2)
i=child.expect(['console','#', 'Name:', pexpect.EOF, pexpect.TIMEOUT], timeout=2)
- except EOF:
+ except pexpect.EOF:
print "EOF", i
#child.sendline()
- except TIMEOUT:
+ except pexpect.TIMEOUT:
print "TIMEOUT", i
#child.interact(escape_character='\x1d', input_filter=None, output_filter=None)
@@ -237,7 +235,7 @@
child = self.__login()
child.logfile = sys.stdout
child.sendline('config')
- cmd = "interface vlan " + vlan)
+ cmd = "interface vlan %s" % (vlan)
child.sendline(cmd)
i=child.expect(['conf-if', pexpect.EOF, pexpect.TIMEOUT])
if i > 0:
@@ -270,7 +268,7 @@
child.logfile = sys.stdout
cmd = "show interfaces g 0/" + str(self.host['hw_port'])
child.sendline(cmd)
- i = child.expect(['#', pexpect.EOF, pexpect.TIMEOUT])
+ __i = child.expect(['#', pexpect.EOF, pexpect.TIMEOUT])
child.terminate()
def interactiveSwitchConfig(self):
@@ -374,12 +372,12 @@
user = "public"
oid = eval("1,3,6,1,4,1,674,10895,3000,1,2,100,1,0")
- errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+ __errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
cmdgen.CommunityData('my-agent', user, 0), \
cmdgen.UdpTransportTarget((host, 161)), oid)
a['hw_model'] = str(varBinds[0][1])
oid = eval("1,3,6,1,4,1,674,10895,3000,1,2,100,3,0")
- errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+ __errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
cmdgen.CommunityData('my-agent', user, 0), \
cmdgen.UdpTransportTarget((host, 161)), oid)
a['hw_make'] = str(varBinds[0][1])
diff --git a/src/zoni/hardware/hpilo.py b/src/zoni/hardware/hpilo.py
index 9523b4b..b02a1ec 100644
--- a/src/zoni/hardware/hpilo.py
+++ b/src/zoni/hardware/hpilo.py
@@ -19,7 +19,6 @@
#
import sys
-import os
import pexpect
import time
@@ -29,6 +28,26 @@
#XXX Need to add more error checking!
#XXX Need to consider difference in responses between a rackmount server and a blade server - MIMOS
+def log(f):
+ def myF(*args, **kw):
+ print "calling %s%s" % (f.__name__, str(args))
+ res = f(*args, **kw)
+ print "returning from %s -> %s" % (f.__name__, str(res))
+ return res
+ myF.__name__ = f.__name__
+ return myF
+
+def timeF(f):
+ def myF(*args, **kw):
+ start = time.time()
+ res = f(*args, **kw)
+ end = time.time()
+ print "%s took %f" % (f.__name__, end-start)
+ return res
+ myF.__name__ = f.__name__
+ return myF
+
+
class hpILo(SystemManagementInterface):
def __init__(self, config, nodeName, hostInfo):
self.config = config
diff --git a/src/zoni/hardware/hpswitch.py b/src/zoni/hardware/hpswitch.py
index ada83b9..cfb7195 100644
--- a/src/zoni/hardware/hpswitch.py
+++ b/src/zoni/hardware/hpswitch.py
@@ -25,13 +25,11 @@
import pexpect
import datetime
import thread
-import time
import threading
import logging
from hwswitchinterface import HwSwitchInterface
-from resourcequerysql import ResourceQuerySql
class HwHPSwitch(HwSwitchInterface):
@@ -74,10 +72,10 @@
child.sendline(cmd)
opt = child.expect(["Confirm(.*)", "No save(.*)", pexpect.EOF, pexpect.TIMEOUT])
if opt == 0:
- print "saving to flash"
- child.sendline("y\n")
+ print "saving to flash"
+ child.sendline("y\n")
if opt == 1:
- print "no save needed"
+ print "no save needed"
child.sendline('exit')
child.terminate()
@@ -169,10 +167,10 @@
i=child.expect(['console','sw', 'Name:', pexpect.EOF, pexpect.TIMEOUT], timeout=2)
i=child.expect(['console','sw', 'Name:', pexpect.EOF, pexpect.TIMEOUT], timeout=2)
- except EOF:
+ except pexpect.EOF:
print "EOF", i
#child.sendline()
- except TIMEOUT:
+ except pexpect.TIMEOUT:
print "TIMEOUT", i
#child.interact(escape_character='\x1d', input_filter=None, output_filter=None)
@@ -245,7 +243,7 @@
cmd = "/info/port " + str(self.host['hw_port'])
child.sendline(cmd)
child.logfile = sys.stdout
- opt = child.expect(['Info(.*)', pexpect.EOF, pexpect.TIMEOUT])
+ __opt = child.expect(['Info(.*)', pexpect.EOF, pexpect.TIMEOUT])
# this needs to be removed or rewritten
def interactiveSwitchConfig(self):
diff --git a/src/zoni/hardware/hwswitchinterface.py b/src/zoni/hardware/hwswitchinterface.py
index d0ed99a..bdcdb51 100644
--- a/src/zoni/hardware/hwswitchinterface.py
+++ b/src/zoni/hardware/hwswitchinterface.py
@@ -18,8 +18,6 @@
# $Id$
#
-import sys
-import os
class HwSwitchInterface(object):
""" Interface description for hardware switches
diff --git a/src/zoni/hardware/ipmi.py b/src/zoni/hardware/ipmi.py
index 649b8a2..abc4bf0 100644
--- a/src/zoni/hardware/ipmi.py
+++ b/src/zoni/hardware/ipmi.py
@@ -18,11 +18,8 @@
# $Id$
#
-import sys
-import os
import subprocess
import logging
-import string
from systemmanagementinterface import SystemManagementInterface
diff --git a/src/zoni/hardware/raritanpdu.py b/src/zoni/hardware/raritanpdu.py
index 3d534b7..7cb3961 100644
--- a/src/zoni/hardware/raritanpdu.py
+++ b/src/zoni/hardware/raritanpdu.py
@@ -18,19 +18,19 @@
# $Id$
#
-import sys
-import os
-import string
import warnings
import logging
+import string
+import sys
import time
+
warnings.filterwarnings("ignore")
from pysnmp.entity.rfc3413.oneliner import cmdgen
from pysnmp.proto import rfc1902
-from zoni.data.resourcequerysql import *
+from zoni.data.resourcequerysql import ResourceQuerySql
from zoni.hardware.systemmanagementinterface import SystemManagementInterface
-
+from zoni.agents.dhcpdns import DhcpDns
#class systemmagement():
#def __init__(self, proto):
@@ -90,7 +90,7 @@
'''
def getOffset(self):
thisoid = eval(str(self.oid) + str(self.oid_status) + "," + str(0))
- errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+ __errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
cmdgen.CommunityData('my-agent', self.user, 0), \
cmdgen.UdpTransportTarget((self.pdu_name, 161)), thisoid)
output = varBinds[0][1]
@@ -102,7 +102,7 @@
def __setPowerStatus(self):
thisoid = eval(str(self.oid) + str(self.oid_status) + "," + str(self.port))
- errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+ __errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
cmdgen.CommunityData('my-agent', self.user, 0), \
cmdgen.UdpTransportTarget((self.pdu_name, 161)), thisoid)
output = varBinds[0][1]
@@ -134,7 +134,7 @@
def powerOn(self):
thisoid = eval(str(self.oid) + str(self.oid_status) + "," + str(self.port))
- errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().setCmd( \
+ __errorIndication, __errorStatus, __errorIndex, __varBinds = cmdgen.CommandGenerator().setCmd( \
cmdgen.CommunityData('my-agent', self.user, 1), \
cmdgen.UdpTransportTarget((self.pdu_name, 161)), \
(thisoid, rfc1902.Integer('1')))
@@ -142,7 +142,7 @@
def powerOff(self):
thisoid = eval(str(self.oid) + str(self.oid_status) + "," + str(self.port))
- errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().setCmd( \
+ __errorIndication, __errorStatus, __errorIndex, __varBinds = cmdgen.CommandGenerator().setCmd( \
cmdgen.CommunityData('my-agent', self.user, 1), \
cmdgen.UdpTransportTarget((self.pdu_name, 161)), \
(thisoid, rfc1902.Integer('0')))
@@ -181,7 +181,7 @@
a={}
oid = eval(str("1,3,6,1,2,1,1,1,0"))
- errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+ __errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
cmdgen.CommunityData('my-agent', user, 0), \
cmdgen.UdpTransportTarget((host, 161)), oid)
@@ -193,7 +193,7 @@
a['hw_make'] = str(varBinds[0][1])
oid = eval("1,3,6,1,4,1,13742,4,1,1,6,0")
- errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+ __errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
cmdgen.CommunityData('my-agent', user, 0), \
cmdgen.UdpTransportTarget((host, 161)), oid)
x = []
@@ -204,7 +204,7 @@
a['hw_mac'] = ":".join(['%s' % d for d in x])
oid = eval("1,3,6,1,4,1,13742,4,1,1,2,0")
- errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+ __errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
cmdgen.CommunityData('my-agent', user, 0), \
cmdgen.UdpTransportTarget((host, 161)), oid)
serial = str(varBinds[0][1])
@@ -214,13 +214,13 @@
a['hw_notes'] = val + "; Serial " + serial
oid = eval("1,3,6,1,4,1,13742,4,1,1,1,0")
- errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+ __errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
cmdgen.CommunityData('my-agent', user, 0), \
cmdgen.UdpTransportTarget((host, 161)), oid)
a['hw_version_fw'] = str(varBinds[0][1])
oid = eval("1,3,6,1,4,1,13742,4,1,1,12,0")
- errorIndication, errorStatus, errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
+ __errorIndication, __errorStatus, __errorIndex, varBinds = cmdgen.CommandGenerator().getCmd( \
cmdgen.CommunityData('my-agent', user, 0), \
cmdgen.UdpTransportTarget((host, 161)), oid)
a['hw_model'] = str(varBinds[0][1])
diff --git a/src/zoni/hardware/systemmanagement.py b/src/zoni/hardware/systemmanagement.py
index 3333a3f..873e1bf 100644
--- a/src/zoni/hardware/systemmanagement.py
+++ b/src/zoni/hardware/systemmanagement.py
@@ -19,13 +19,11 @@
# $Id$
#
-import sys
-import os
import logging
import threading
+import time
from systemmanagementinterface import SystemManagementInterface
-from zoni.data.resourcequerysql import *
from tashi.util import instantiateImplementation
@@ -41,7 +39,7 @@
def getInfo(self, nodeName):
- self.host = self.data.getHostInfo(node)
+ self.host = self.data.getHostInfo(nodeName)
def setVerbose(self, verbose):
@@ -65,7 +63,7 @@
# [2] = hw method password
success = 0
for i in hw:
- inst = instantiateImplementation(self.config['hardwareControl'][i[0]]['class'], self.config, nodeName, self.host)
+ __inst = instantiateImplementation(self.config['hardwareControl'][i[0]]['class'], self.config, nodeName, self.host)
a = "inst.%s" % mycmd
for count in range(retries):
doit = eval(a)
@@ -89,7 +87,7 @@
def softPowerConfirm(self, method, nodeName):
# using a sleep for now...
time.sleep(30)
- inst = instantiateImplementation(self.config['hardwareControl'][method]['class'], self.config, nodeName, self.host)
+ __inst = instantiateImplementation(self.config['hardwareControl'][method]['class'], self.config, nodeName, self.host)
mycmd = "%s()" % ("powerOff")
a = "inst.%s" % mycmd
doit = eval(a)
diff --git a/src/zoni/hardware/systemmanagementinterface.py b/src/zoni/hardware/systemmanagementinterface.py
index 6d50a09..0d7901b 100644
--- a/src/zoni/hardware/systemmanagementinterface.py
+++ b/src/zoni/hardware/systemmanagementinterface.py
@@ -18,9 +18,6 @@
# $Id$
#
-import sys
-import os
-
class SystemManagementInterface(object):
""" Interface description for hardware management controllers
diff --git a/src/zoni/install/db/zoniDbSetup.py b/src/zoni/install/db/zoniDbSetup.py
index 88998b4..5049046 100644
--- a/src/zoni/install/db/zoniDbSetup.py
+++ b/src/zoni/install/db/zoniDbSetup.py
@@ -20,15 +20,13 @@
import os
import sys
-import string
try:
import MySQLdb
- import traceback
import optparse
import getpass
except ImportError, e:
- print "Module not installed : %s" % e
- exit()
+ print "Module not installed : %s" % e
+ exit()
a = os.path.join("../")
@@ -38,8 +36,8 @@
a = os.path.join("../../..")
sys.path.append(a)
-from zoni.version import *
-from zoni.extra.util import *
+from zoni.version import version, revision
+from zoni.extra.util import getConfig
def main():
@@ -53,7 +51,7 @@
parser.add_option("-u", "--userName", "--username", dest="userName", help="Mysql username")
parser.add_option("-p", "--password", dest="password", help="Admin mysql password")
#parser.add_option("-v", "--verbose", dest="verbosity", help="Be verbose", action="store_true", default=False)
- (options, args) = parser.parse_args()
+ (options, __args) = parser.parse_args()
if not options.userName:
parser.print_help()
@@ -63,7 +61,7 @@
if not options.password:
password = getpass.getpass()
- (configs, configFiles) = getConfig()
+ (configs, __configFiles) = getConfig()
CreateZoniDb(configs, options.userName, password)
@@ -174,7 +172,7 @@
sys.stdout.write(" Creating sysdomainmembermap...")
execQuery(conn, "CREATE TABLE IF NOT EXISTS `sysdomainmembermap` (`sys_id` int(11) unsigned NOT NULL, `domain_id` int(11) NOT NULL)")
sys.stdout.write("Success\n")
- # Create allocationinfo
+ # Create allocationinfo
sys.stdout.write(" Creating allocationinfo...")
execQuery(conn, "CREATE TABLE IF NOT EXISTS `allocationinfo` ( `allocation_id` int(11) unsigned NOT NULL auto_increment, `sys_id` int(11) unsigned NOT NULL, `reservation_id` int(11) unsigned NOT NULL, `pool_id` int(11) unsigned NULL, `hostname` varchar(64) default NULL, `domain_id` int(11) unsigned NOT NULL, `notes` tinytext, `expire_time` timestamp default 0 NOT NULL, PRIMARY KEY (`allocation_id`)) ENGINE=INNODB")
sys.stdout.write("Success\n")
@@ -224,10 +222,10 @@
if checkVal:
sys.stdout.write(" Kernel already exists in DB...\n")
# Get the kernel_id
- kernelId = str(checkVal[1][0][0])
+ #kernelId = str(checkVal[1][0][0])
else:
- r = execQuery(conn, "INSERT into `kernelinfo` (kernel_name, kernel_release, kernel_arch) values ('linux-2.6.24-19-server', '2.6.24-19-server', 'x86_64' )")
- kernelId = str(r.lastrowid)
+ __r = execQuery(conn, "INSERT into `kernelinfo` (kernel_name, kernel_release, kernel_arch) values ('linux-2.6.24-19-server', '2.6.24-19-server', 'x86_64' )")
+ #kernelId = str(r.lastrowid)
sys.stdout.write(" Success\n")
# Initrd
@@ -325,7 +323,7 @@
if checkVal:
sys.stdout.write("Default Domain (ZoniHome) already linked to vlan " + config['zoniHomeDomain'] + "...\n")
# Get the domainId
- valId = str(checkVal[1][0][0])
+ #valId = str(checkVal[1][0][0])
else:
r = execQuery(conn, "INSERT into `domainmembermap` (domain_id, vlan_id) values (" + domainId + ", " + vlanId + ")")
domainId = str(r.lastrowid)
@@ -358,7 +356,7 @@
if checkVal:
sys.stdout.write("Default pool (ZoniHome) already exists...\n")
# Get the domainId
- poolId = str(checkVal[1][0][0])
+ #poolId = str(checkVal[1][0][0])
else:
r = execQuery(conn, "INSERT into `poolmap` (pool_id, vlan_id) values (" + zoniPoolId + ", " + vlanId + ")")
domainId = str(r.lastrowid)
@@ -370,7 +368,7 @@
sys.stdout.write("Default pool (ZoniHome) already exists...\n")
# XXX probably should delete first then add, do it later
# Get the domainId
- poolId = str(checkVal[1][0][0])
+ #poolId = str(checkVal[1][0][0])
else:
r = execQuery(conn, "INSERT into `poolmap` (pool_id, vlan_id) values (" + zoniIpmiId + ", " + vlanId + ")")
domainId = str(r.lastrowid)
@@ -406,7 +404,7 @@
def entryExists(conn, table, col, checkVal):
query = "select * from " + table + " where " + col + " = '" + checkVal + "'"
- r = execQuery(conn, query)
+ r = execQuery(conn, query)
res = r.fetchall()
if len(res) > 0:
return (1, res)
@@ -415,5 +413,5 @@
if __name__ == "__main__":
- main()
+ main()
diff --git a/src/zoni/install/dnsdhcp/zoniDnsDhcpSetup.py b/src/zoni/install/dnsdhcp/zoniDnsDhcpSetup.py
index a68eb83..f4ba456 100755
--- a/src/zoni/install/dnsdhcp/zoniDnsDhcpSetup.py
+++ b/src/zoni/install/dnsdhcp/zoniDnsDhcpSetup.py
@@ -21,10 +21,7 @@
import os
import sys
-import string
-import traceback
import optparse
-import getpass
a = os.path.join("../")
sys.path.append(a)
@@ -33,8 +30,8 @@
a = os.path.join("../../..")
sys.path.append(a)
-from zoni.version import *
-from zoni.extra.util import *
+from zoni.version import version, revision
+from zoni.extra.util import createKey
def main():
@@ -47,13 +44,13 @@
parser = optparse.OptionParser(usage="%prog -k keyname", version="%prog " + ver + " " + rev)
parser.add_option("-k", "--keyName", "--keyname", dest="keyName", help="Key name")
#parser.add_option("-v", "--verbose", dest="verbosity", help="Be verbose", action="store_true", default=False)
- (options, args) = parser.parse_args()
+ (options, __args) = parser.parse_args()
if not options.keyName:
parser.print_help()
exit(1)
- (configs, configFiles) = getConfig()
+ #(configs, configFiles) = getConfig()
key = createKey(options.keyName)
@@ -79,5 +76,5 @@
if __name__ == "__main__":
- main()
+ main()
diff --git a/src/zoni/install/pxe/zoniPxeSetup.py b/src/zoni/install/pxe/zoniPxeSetup.py
index dd46984..a9e06c8 100644
--- a/src/zoni/install/pxe/zoniPxeSetup.py
+++ b/src/zoni/install/pxe/zoniPxeSetup.py
@@ -21,9 +21,6 @@
import os
import sys
-import string
-import traceback
-import optparse
import shutil
import urllib
import tarfile
@@ -38,21 +35,21 @@
a = os.path.join("../../..")
sys.path.append(a)
-from zoni.extra.util import *
-from zoni.version import *
+from zoni.extra.util import getConfig, checkSuper, createDir
+#from zoni.version import version, revision
from zoni.bootstrap.pxe import Pxe
def main():
''' This file sets up PXE for Zoni '''
- ver = version.split(" ")[0]
- rev = revision
+ #ver = version.split(" ")[0]
+ #rev = revision
- parser = optparse.OptionParser(usage="%prog ", version="%prog " + ver + " " + rev)
- (options, args) = parser.parse_args()
+ #parser = optparse.OptionParser(usage="%prog ", version="%prog " + ver + " " + rev)
+ #(options, args) = parser.parse_args()
- (configs, configFile) = getConfig()
+ (configs, __configFile) = getConfig()
ZoniPxeSetup(configs)
ZoniGetSyslinux(configs)
@@ -62,11 +59,11 @@
tftpRootDir = config['tftpRootDir']
tftpImageDir = config['tftpImageDir']
tftpBootOptionsDir = config['tftpBootOptionsDir']
- tftpUpdateFile = config['tftpUpdateFile']
+ #tftpUpdateFile = config['tftpUpdateFile']
tftpBaseFile = config['tftpBaseFile']
tftpBaseMenuFile = config['tftpBaseMenuFile']
installBaseDir = config['installBaseDir']
- registrationBaseDir = config['registrationBaseDir']
+ #registrationBaseDir = config['registrationBaseDir']
# Create the directory structure
diff --git a/src/zoni/install/www/zoniWebSetup.py b/src/zoni/install/www/zoniWebSetup.py
index 3b2fa1a..1eebf98 100644
--- a/src/zoni/install/www/zoniWebSetup.py
+++ b/src/zoni/install/www/zoniWebSetup.py
@@ -22,12 +22,8 @@
import os
import sys
import time
-import string
-import traceback
-import optparse
+#import optparse
import shutil
-import urllib
-import tarfile
a = os.path.join("../")
sys.path.append(a)
@@ -36,21 +32,20 @@
a = os.path.join("../../..")
sys.path.append(a)
-from zoni.extra.util import *
-from zoni.version import *
-from zoni.bootstrap.pxe import Pxe
+from zoni.extra.util import getConfig, checkSuper, createDir
+#from zoni.version import version, revision
def main():
''' This file sets up the web files for Zoni '''
- ver = version.split(" ")[0]
- rev = revision
+ #ver = version.split(" ")[0]
+ #rev = revision
- parser = optparse.OptionParser(usage="%prog ", version="%prog " + ver + " " + rev)
- (options, args) = parser.parse_args()
+ #parser = optparse.OptionParser(usage="%prog ", version="%prog " + ver + " " + rev)
+ #(options, args) = parser.parse_args()
- (configs, configFiles) = getConfig()
+ (configs, __configFiles) = getConfig()
ZoniWebSetup(configs)
ZoniCreateWebConfigFile(configs)
diff --git a/src/zoni/services/pcvciservice.py b/src/zoni/services/pcvciservice.py
index 73ed819..9e9083f 100755
--- a/src/zoni/services/pcvciservice.py
+++ b/src/zoni/services/pcvciservice.py
@@ -19,7 +19,6 @@
# $Id$
#
-import threading
import logging
from tashi.util import instantiateImplementation
@@ -49,10 +48,11 @@
def requestResources(self, key, specs, quantity):
vcm = self.__key2vcm(key)
- node = specs
+ #node = specs
''' Check for keys later '''
self.log.info("VCM_REQUEST_RESOURCE: VCM %s RESOURCE %s(%s)" % (vcm, specs, quantity))
- # go to scheduler val = self.agent.requestResource(specs)
+ # go to scheduler
+ val = self.agent.requestResource(specs)
if val:
return 1
return 0
diff --git a/src/zoni/services/zonimanager.py b/src/zoni/services/zonimanager.py
index c43d05d..0eff740 100755
--- a/src/zoni/services/zonimanager.py
+++ b/src/zoni/services/zonimanager.py
@@ -20,18 +20,11 @@
#
import os
-import sys
-import threading
-import signal
import logging.config
-import signal
-from tashi.util import instantiateImplementation, signalHandler
+from tashi.util import instantiateImplementation
-from zoni.extra.util import loadConfigFile, getConfig, debugConsole
-from zoni.version import *
-from zoni.services.hardwareservice import HardwareService
-from zoni.services.pcvciservice import pcmService
+from zoni.extra.util import getConfig, debugConsole
from zoni.services.rpycservices import ManagerService
from rpyc.utils.server import ThreadedServer
diff --git a/src/zoni/version.py b/src/zoni/version.py
index ea515d5..b34fd14 100644
--- a/src/zoni/version.py
+++ b/src/zoni/version.py
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-id = "$Id: version.py 964467 2010-07-15 15:31:02Z rgass $"
+_id = "$Id: version.py 964467 2010-07-15 15:31:02Z rgass $"
lastChangeDate = "$LastChangedDate$"
lastChangeRevision = "$Rev: 964467 $"
revision = lastChangeRevision