| #!/usr/bin/env python |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| """ |
| hawq checkperf -- Check the hardware for Greenplum Database |
| |
| Usage: hawq checkperf <options> |
| |
| --version : print version information |
| -d segdir : where segdir is a directory on which to test I/O performance. |
| Multiple -d flags may be specified for multiple directories |
| on each host. These will typically be your Greenplum primary |
| (and/or mirror) data directories. |
| e.g. -d /dbfast1 -d /dbfast2 |
| -? : print this help text |
| -v : enable verbose |
| -V : enable very verbose |
| -D : print per host disk stats on disk read/write tests |
| -r flags : where flags specifies the tests to run [default=dsn]. |
| Supported test flags are: |
| d: runs only disk tests |
| s: runs only stream benchmark |
| n: runs only netperf tests (serial) |
| N: runs only netperf tests (parallel) |
| M: runs only netperf tests (full matrix) |
| -B size : where size specifies the block size for disk performance tests |
| [default=32KB] e.g. 1KB, 4MB |
| -S size : where size specifies the file size for disk performance tests |
| [default=2X Memory] e.g. 500MB, 16GB |
| -h host : the host to connect to (multiple -h is okay) |
| -f file : a file listing all hosts to connect to |
| --duration : how long to run network test (default 5 seconds) |
| --netperf : use netperf instead of gpnetbenchServer/gpnetbenchClient |
| """ |
| |
| import os, sys |
| |
| sys.path.append(sys.path[0] + '/lib') |
| try: |
| import getopt, math, StringIO, stat, subprocess, signal |
| from gppylib.gpparseopts import OptParser |
| from gppylib.util import ssh_utils |
| from gppylib.gpcoverage import GpCoverage |
| except ImportError, e: |
| sys.exit('Error: unable to import module: ' + str(e)) |
| |
| |
| GPHOME=os.getenv('GPHOME') |
| if GPHOME is None: |
| sys.exit('Please set GPHOME environment variable') |
| |
| USER=os.getenv('USER') |
| if USER is None or USER is ' ': |
| sys.exit('Please set USER environment variable') |
| |
| ################ |
| def usage(exitarg): |
| parser = OptParser() |
| try: |
| parser.print_help() |
| except: |
| print __doc__ |
| sys.exit(exitarg) |
| |
| |
| class Global(): |
| script_name = os.path.split(__file__)[-1] |
| opt = {} |
| opt['-d'] = [] |
| opt['-D'] = False |
| opt['-v'] = False |
| opt['-V'] = False |
| opt['-r'] = '' |
| opt['-B'] = 1024 * 32 |
| opt['-S'] = 0 |
| opt['-h'] = [] |
| opt['-f'] = None |
| opt['--duration'] = 15 # default to 15s |
| opt['--net'] = None |
| opt['--netserver'] = 'gpnetbenchServer' |
| opt['--netclient'] = 'gpnetbenchClient' |
| |
| GV = Global() |
| |
| ############# |
| def killall(procname): |
| return 'F=%s && (pkill $F || pkill -f $F || killall -9 $F) > /dev/null 2>&1 || true' % procname |
| |
| ############################### |
| def strcmd(cmd): |
| return reduce(lambda x, y: x + ' ' + y, map(lambda x: x.find(' ') > 0 and "'" + x + "'" or x, cmd)) |
| |
| ############# |
| def gpssh(cmd): |
| c = ['%s/bin/gpssh' % GPHOME] |
| if (GV.opt['-V']): c.append('-v') |
| if (GV.opt['-f']): |
| c.append('-f') |
| c.append(GV.opt['-f']) |
| else: |
| for h in GV.opt['-h']: |
| c.append('-h') |
| c.append(h) |
| c.append(cmd) |
| |
| if GV.opt['-v']: print '[Info]', strcmd(c) |
| p = subprocess.Popen(c, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) |
| out = p.stdout.read(-1) |
| rc = p.wait() |
| return (not rc, out) |
| |
| ############# |
| def gpscp(src, dst): |
| c = ['%s/bin/gpscp' % GPHOME] |
| if (GV.opt['-V']): c.append('-v') |
| if (GV.opt['-f']): |
| c.append('-f') |
| c.append(GV.opt['-f']) |
| else: |
| for h in GV.opt['-h']: |
| c.append('-h') |
| c.append(h) |
| c.append(src) |
| c.append(dst) |
| if GV.opt['-v']: print '[Info]', strcmd(c) |
| p = subprocess.Popen(c, stdout=subprocess.PIPE) |
| out = p.stdout.read(-1) |
| rc = p.wait() |
| return (not rc, out) |
| |
| ################ |
| def getPlatform(): |
| if sys.platform.find('linux') >= 0: return 'linux' |
| if sys.platform.find('darwin') >= 0: return 'darwin' |
| if sys.platform.find('sunos5') >= 0: return 'sunos5' |
| return '?' |
| |
| ################ |
| def getMemory(): |
| if getPlatform() == 'linux': |
| ok, out = run("sh -c 'cat /proc/meminfo | grep MemTotal'") |
| if not ok: return 0 |
| list = out.strip().split(' ') |
| val = int(list[len(list) - 2]) |
| factor = list[len(list) - 1] |
| if factor == 'kB': |
| return val * 1024 |
| return 0 |
| |
| if getPlatform() == 'darwin': |
| ok, out = run("/usr/sbin/sysctl hw.physmem") |
| if not ok: return 0 |
| list = out.strip().split(' ') |
| val = int(list[1]) |
| return val |
| |
| if getPlatform() == 'sunos5': |
| ok, out = run("sh -c \"/usr/sbin/prtconf | awk '/^Memory/{print}'\"") |
| if not ok: return 0 |
| list = out.strip().split(' ') |
| val = int(list[2]) |
| factor = list[3] |
| if factor == 'Megabytes': |
| return val * 1024 * 1024 |
| return 0 |
| |
| return 0 |
| |
| ################ |
| def parseMemorySize(line): |
| factor = 1 |
| try: |
| line = line.strip().upper() |
| if line.endswith('B'): line = line[:-1] |
| if line.endswith('G'): factor = 1024 * 1024 * 1024 |
| elif line.endswith('M'): factor = 1024 * 1024 |
| elif line.endswith('K'): factor = 1024 |
| if factor > 1: line = line[:-1] |
| return int(line) * factor |
| except Exception, err: |
| print 'Error: ' + err |
| sys.exit(-1) |
| return 0 |
| |
| ############# |
| def parseDuration(val): |
| factor = 1 |
| try: |
| val = val.strip().upper() |
| if val.endswith('S'): val = val[:-1] |
| if val.endswith('M'): factor = 60 |
| if val.endswith('H'): factor = 3600 |
| if val.endswith('D'): factor = 86400 |
| if factor > 1: val = val[:-1] |
| return int(val) * factor |
| except Exception, err: |
| print "exception: %s" % err |
| sys.exit(-1) |
| |
| |
| ############# |
| def print_version(): |
| print '%s version $Revision$' % GV.script_name |
| sys.exit(0) |
| |
| ############# |
| def parseCommandLine(): |
| global opt |
| try: |
| (options, args) = getopt.getopt(sys.argv[1:], '?vVDd:r:B:S:p:h:f:', ['duration=', 'version', 'netperf']) |
| except Exception, e: |
| usage('Error: ' + str(e)) |
| |
| for (switch, val) in options: |
| if (switch == '-?'): usage(0) |
| elif (switch[1] in 'vVD'): GV.opt[switch] = True |
| elif (switch[1] in 'dh'): GV.opt[switch].append(val) |
| elif (switch[1] in 'rf'): GV.opt[switch] = val |
| elif (switch[1] in 'BS'): GV.opt[switch] = parseMemorySize(val) |
| elif (switch == '--duration'): GV.opt[switch] = parseDuration(val) |
| elif (switch == '--version'): print_version() |
| elif (switch == '--netperf'): |
| GV.opt['--netserver'] = 'netserver' |
| GV.opt['--netclient'] = 'netperf' |
| |
| # run default tests (if not specified) |
| if GV.opt['-r'] == '': |
| GV.opt['-r'] = 'dsN' |
| |
| # parse netperf test type, error if more than one specified |
| netcount = 0 |
| if GV.opt['-r'].find('n') >= 0: |
| GV.opt['--net'] = 'netperf' |
| netcount += 1 |
| if GV.opt['-r'].find('N') >= 0: |
| GV.opt['--net'] = 'parallel' |
| netcount += 1 |
| if GV.opt['-r'].find('M') >= 0: |
| GV.opt['--net'] = 'matrix' |
| netcount += 1 |
| if netcount > 1: |
| usage('Error: please only specify one network test at a time (-r [n|M|N])') |
| |
| if GV.opt['-V']: GV.opt['-v'] = True |
| |
| if (len(GV.opt['-d']) == 0): |
| usage('Error: please specify at least one segdir (-d switch)') |
| |
| if not (0 <= GV.opt['-B'] and 0 <= GV.opt['-S']): |
| usage('Error: please enter valid memory sizes for -B or -S switches') |
| |
| hf = (len(GV.opt['-h']) and 1 or 0) + (GV.opt['-f'] and 1 or 0) |
| if hf != 1: |
| usage('Error: please specify at least one of -h or -f args, but not both') |
| |
| if (GV.opt['-B']) > 1024*1024: |
| usage('Error: maximum size for -B parameter is 1MB') |
| |
| if GV.opt['-S'] == 0: |
| GV.opt['-S'] = 2 * getMemory() / len(GV.opt['-d']) |
| else: |
| GV.opt['-S'] /= len(GV.opt['-d']) |
| |
| if GV.opt['--duration'] <= 0: |
| GV.opt['--duration'] = 15 |
| print '[INFO] Invalid network duration specified. Using default (15 seconds)' |
| |
| # strip the last '/' from the dir |
| dd = [] |
| for d in GV.opt['-d']: |
| if d and d[-1] == '/': d = d[:-1] |
| dd.append(d) |
| GV.opt['-d'] = dd |
| |
| def strcmd(cmd): |
| return reduce(lambda x, y: x + ' ' + y, map(lambda x: x.find(' ') > 0 and "'" + x + "'" or x, cmd)) |
| |
| print strcmd(sys.argv) |
| |
| |
| ############# |
| def run(cmd): |
| f = None |
| ok = False |
| out = '' |
| try: |
| try: |
| if GV.opt['-v']: print '[Info]', cmd |
| f = os.popen(cmd) |
| out = f.read() |
| ok = not f.close() |
| f = None |
| except KeyboardInterrupt: |
| raise |
| finally: |
| if f: f.close() |
| |
| if GV.opt['-v'] and out and out != '': print out |
| |
| if out and out.find('Permission denied') >= 0 and out.find('publickey') >= 0 : |
| if not GV.opt['-v']: print out |
| print 'Please use gpssh-exkeys to exchange keys ...' |
| sys.exit(1) |
| |
| return (ok, out) |
| |
| ############# |
| runTeardown_torndown = False |
| def runTeardown(): |
| global runTeardown_torndown |
| if runTeardown_torndown: return |
| if GV.opt['-v']: |
| print '--------------------' |
| print ' TEARDOWN' |
| print '--------------------' |
| dirs = '' |
| for d in GV.opt['-d']: dirs = '%s %s/gpcheckperf_$USER' % (dirs, d) |
| try: |
| (ok, out) = gpssh('rm -rf ' + dirs) |
| except: |
| pass |
| |
| try: |
| if GV.opt['--net']: |
| (ok, out) = gpssh(killall(GV.opt['--netserver'])) |
| except: |
| pass |
| |
| runTeardown_torndown = True |
| |
| ############# |
| def runSetup(): |
| if GV.opt['-v']: |
| print '--------------------' |
| print ' SETUP' |
| print '--------------------' |
| okCount = 0 |
| try: |
| #check python reachable |
| if GV.opt['-v']: print '[Info] verify python interpreter exists' |
| (ok, out) = gpssh('python -c print') |
| if not ok: |
| if not GV.opt['-v']: print out |
| sys.exit("[Error] unable to find python interpreter on some hosts\n" |
| +" verify PATH variables on the hosts") |
| |
| #mkdir cperf |
| if GV.opt['-v']: print '[Info] making hawq checkperf directory on all hosts ... ' |
| dirs = '' |
| for d in GV.opt['-d']: dirs = '%s %s/gpcheckperf_$USER' % (dirs, d) |
| |
| cmd = 'rm -rf %s ; mkdir -p %s' % (dirs, dirs) |
| (ok, out) = gpssh(cmd) |
| if not ok: |
| sys.exit("[Error] unable to make hawq checkperf directory. \n" |
| + " command failed: " + cmd) |
| |
| except: |
| runTeardown() |
| raise |
| |
| ################ |
| def copyExecOver(fname): |
| |
| #copy to this dir |
| d = GV.opt['-d'][0] |
| target = '%s/gpcheckperf_$USER/%s' % (d, fname) |
| path = '%s/lib/%s' % (sys.path[0], fname) |
| |
| if not os.path.isfile(path): |
| print "binary file not found: %s" % path |
| sys.exit(1) |
| |
| if GV.opt['-v']: print '[Info] copy local %s to remote %s' % (path, target) |
| |
| try: pathStat = os.stat(path) |
| except: sys.exit('[Error] cannot stat file ' + path) |
| if not stat.S_ISREG(pathStat.st_mode): sys.exit('[Error] invalid file ' + path) |
| if not os.access(path, os.X_OK): sys.exit('[Exit] file not executable: ' + path) |
| |
| (ok, out) = gpscp(path, '=:%s' % target) |
| if not ok: sys.exit('[Error] command failed: gpscp %s =:%s' % (path, target)) |
| |
| #chmod +x file |
| (ok, out) = gpssh('chmod a+rx %s' % target) |
| if not ok: sys.exit('[Error] command failed: chmod a+rx %s' % target) |
| |
| return target |
| |
| |
| ############# |
| def parseMultiDDResult(out): |
| # parse output of "time -p" |
| out = StringIO.StringIO(out) |
| result = {} |
| bytes = 0 |
| for line in out: |
| i = line.find(']') |
| if i == -1: continue |
| o = line[i+2:] |
| |
| if o.startswith('multidd total bytes '): |
| h = line[1:i] |
| o = o.split() |
| bytes = int(o[-1]) |
| continue |
| |
| if o.startswith('real'): |
| h = line[1:i] |
| o = o.split() |
| time = float(o[1]) |
| if time < 0.1: time = 0.1 # avoid division by zero |
| mbps = bytes / time / 1024 / 1024 |
| result[h] = (mbps, time, bytes) |
| continue |
| |
| return result |
| |
| |
| |
| ############# |
| def runDiskWriteTest(multidd): |
| print |
| print '--------------------' |
| print '-- DISK WRITE TEST' |
| print '--------------------' |
| |
| cmd = 'time -p ' + multidd |
| for d in GV.opt['-d']: |
| cmd = cmd + (' -i /dev/zero -o %s/gpcheckperf_$USER/ddfile' % d) |
| if GV.opt['-B']: cmd = cmd + (' -B %d' % GV.opt['-B']) |
| if GV.opt['-S']: cmd = cmd + (' -S %d' % GV.opt['-S']) |
| (ok, out) = gpssh(cmd) |
| if not ok: |
| sys.exit('[Error] command failed: ' + cmd) |
| return parseMultiDDResult(out) |
| |
| |
| ############# |
| def runDiskReadTest(multidd): |
| print |
| print '--------------------' |
| print '-- DISK READ TEST' |
| print '--------------------' |
| |
| cmd = 'time -p ' + multidd |
| for d in GV.opt['-d']: |
| cmd = cmd + (' -o /dev/null -i %s/gpcheckperf_$USER/ddfile' % d) |
| if GV.opt['-B']: cmd = cmd + (' -B %d' % GV.opt['-B']) |
| if GV.opt['-S']: cmd = cmd + (' -S %d' % GV.opt['-S']) |
| (ok, out) = gpssh(cmd) |
| if not ok: |
| sys.exit('[Error] command failed: ' + cmd) |
| return parseMultiDDResult(out) |
| |
| |
| ############# |
| def runStreamTest(): |
| print |
| print '--------------------' |
| print '-- STREAM TEST' |
| print '--------------------' |
| |
| rmtPath = copyExecOver('stream') |
| (ok, out) = gpssh(rmtPath) |
| if not ok: sys.exit('[Error] command failed: ' + rmtPath) |
| out = StringIO.StringIO(out) |
| result = {} |
| for line in out: |
| i = line.find(']') |
| o = line[i+2:] |
| if o.startswith('Copy:'): |
| h = line[1:i] |
| o = o.split() |
| result[h] = (float(o[1]), 0, 0) |
| return result |
| |
| |
| ############# |
| def startNetServer(): |
| port = 23000 |
| rmtPath = copyExecOver(GV.opt['--netserver']) |
| |
| for i in xrange(5): |
| if i > 0: print '[Warning] retrying with port %d' % port |
| (ok, out) = gpssh(killall(GV.opt['--netserver'])) |
| |
| (ok, out) = gpssh('%s -p %d > /dev/null 2>&1' % (rmtPath, port)) |
| if ok: return port |
| |
| if GV.opt['-v']: print '[Warning] start netserver with port %d failed' % port |
| port += 12 |
| |
| return 0 |
| |
| ############# |
| def killProc(proc): |
| if proc: |
| try: |
| os.kill(proc.pid, signal.SIGKILL) |
| except OSError: |
| pass |
| return proc.wait() |
| return 0 |
| |
| |
| ############# |
| def spawnNetperfTestBetween(x, y, netperf_path, netserver_port, sec=5): |
| cmd = ('%s -H %s -p %d -t TCP_STREAM -l %s -f M -P 0 ' |
| % (netperf_path, y, netserver_port, sec)) |
| c = ['ssh', '-o', 'BatchMode yes', |
| '-o', 'StrictHostKeyChecking no', |
| x, cmd] |
| proc = None |
| try: |
| if GV.opt['-v']: print '[Info]', strcmd(c) |
| proc = subprocess.Popen(c, stdout=subprocess.PIPE) |
| except KeyboardInterrupt: |
| killProc(proc) |
| raise |
| if not proc: |
| print '[Warning] netperf failed on %s -> %s' % (x, y) |
| return (None, None, None) |
| |
| return (proc, x, y) |
| |
| |
| ############# |
| def reapNetperfTest(proc, x, y): |
| ok = True |
| out = None |
| if proc: |
| try: |
| rc = proc.wait() |
| out = proc.stdout.read(-1) |
| ok = not killProc(proc) |
| proc = None |
| except KeyboardInterrupt, ki: |
| raise |
| except: |
| pass |
| finally: |
| proc = None |
| |
| if GV.opt['-v'] and out and out != '': |
| print '[Info]',out |
| if out and out.find('Permission denied') >= 0 and out.find('publickey') >= 0 : |
| if not GV.opt['-v']: |
| print out |
| print '[Error] Please use gpssh-exkeys to exchange keys ...' |
| sys.exit(1) |
| |
| if not ok: |
| print '[Warning] netperf failed on %s -> %s' % (x, y) |
| return [] |
| |
| for line in StringIO.StringIO(out): |
| line = line.split() |
| if len(line) != 5: continue |
| l = [x, y] |
| l.extend(map(lambda x: float(x), line)) |
| if GV.opt['-v']: |
| print '[Info] %s -> %s : %s' % (x, y, str(line)) |
| return l |
| |
| print '[Warning] netperf failed on %s -> %s (invalid result)' % (x, y) |
| print '[Info]', out |
| return [] |
| |
| |
| |
| ############# |
| def runNetperfTestBetween(x, y, netperf_path, netserver_port): |
| (proc, x, y) = spawnNetperfTestBetween(x, y, netperf_path, netserver_port, GV.opt['--duration']) |
| if not proc: |
| return [] |
| |
| return reapNetperfTest(proc, x, y) |
| |
| |
| ############# |
| def setupNetPerfTest(): |
| print |
| print '-------------------' |
| print '-- NETPERF TEST' |
| print '-------------------' |
| |
| hostlist = ssh_utils.HostList() |
| for h in GV.opt['-h']: hostlist.add(h) |
| if GV.opt['-f']: hostlist.parseFile(GV.opt['-f']) |
| |
| h = hostlist.get() |
| if len(h) == 0: |
| usage('Error: missing hosts in -h and/or -f arguments') |
| |
| if len(h) == 1: |
| print '[Warning] single host only - abandon netperf test' |
| return (None, None, None) |
| |
| # start netserver |
| netserver_port = startNetServer() |
| if netserver_port == 0: |
| print '[Error] unable to start netserver ... abort netperf test' |
| return (None, None, None) |
| |
| netperf = copyExecOver(GV.opt['--netclient']) |
| return (netperf, h, netserver_port) |
| |
| |
| ############# |
| def runNetPerfTest(): |
| (netperf, hostlist, netserver_port) = setupNetPerfTest() |
| if not netperf: |
| return None |
| |
| # make sure we have even number of hosts |
| odd = None |
| h = hostlist[:] |
| if len(h) & 1 == 1: |
| odd = h.pop() # take one out |
| |
| # run netperf test between host[0] & host[1], host[2] & host[3], ... |
| result = [] |
| for i in xrange(len(h)): |
| if i & 1 == 0: continue |
| x = h[i-1] |
| y = h[i] |
| res = runNetperfTestBetween(x, y, netperf, netserver_port) |
| if res and len(res) >= 6: result.append(res) |
| res = runNetperfTestBetween(y, x, netperf, netserver_port) |
| if res and len(res) >= 6: result.append(res) |
| |
| # run netperf test between the odd host and host 0 |
| if odd: |
| x = odd |
| y = h[0] |
| res = runNetperfTestBetween(x, y, netperf, netserver_port) |
| if res and len(res) >= 6: result.append(res) |
| res = runNetperfTestBetween(y, x, netperf, netserver_port) |
| if res and len(res) >= 6: result.append(res) |
| |
| return result |
| |
| |
| ############# |
| def runNetPerfTestParallel(): |
| (netperf, hostlist, netserver_port) = setupNetPerfTest() |
| if not netperf: |
| return None |
| |
| # make sure we have even number of hosts |
| h = hostlist[:] |
| if len(h) & 1 == 1: |
| h.append(h[0]) |
| |
| # run netperf test between host[0] & host[1], host[2] & host[3], ... |
| result = [] |
| procList = [] |
| for i in xrange(len(h)): |
| if i & 1 == 0: continue |
| x = h[i-1] |
| y = h[i] |
| p = spawnNetperfTestBetween(x, y, netperf, netserver_port, GV.opt['--duration']) |
| if p[0]: |
| procList.append(p) |
| |
| for (proc, x, y) in procList: |
| res = reapNetperfTest(proc, x, y) |
| if res and len(res) >= 6: result.append(res) |
| |
| # second round: test between host[1] & host[0], host[3] & host[2], ... |
| procList = [] |
| for i in xrange(len(h)): |
| if i & 1 == 0: continue |
| x = h[i-1] |
| y = h[i] |
| p = spawnNetperfTestBetween(y, x, netperf, netserver_port, GV.opt['--duration']) |
| if p[0]: |
| procList.append(p) |
| |
| for (proc, x, y) in procList: |
| res = reapNetperfTest(proc, x, y) |
| if res and len(res) >= 6: result.append(res) |
| |
| |
| return result |
| |
| ############# |
| def get_host_map(hostlist): |
| ''' |
| Returns a seglist dictionary that maps segment name to hostname and |
| a uniqhosts dictionary that maps each hostname to one segment name. |
| |
| For example: |
| seglist = |
| seglist['sdw1-1'] = apollo1 |
| seglist['sdw1-2'] = apollo1 |
| seglist['sdw2-1'] = apollo2 |
| seglist['sdw2-2'] = apollo2 |
| uniqhosts = |
| uniqhosts['apollo1'] = 'sdw1-1' |
| uniqhosts['apollo2'] = 'sdw2-1' |
| |
| ''' |
| seglist = dict() # segment list |
| uniqhosts = dict() # unique host list |
| |
| # get list of hostnames |
| rc, out = gpssh('hostname') |
| if not rc: |
| raise Exception('Encountered error running hostname') |
| sys.exit(-1) |
| |
| # get unique hostname list |
| for line in out.splitlines(): |
| seg, host = line.translate(None,'[]').split() |
| uniqhosts[host] = seg |
| |
| # get list of segments associated with each host (can't use gpssh since it de-dupes hosts) |
| for host in hostlist: |
| cmd = ['ssh', '-o', 'BatchMode yes', '-o', 'StrictHostKeyChecking no', host, 'hostname' ] |
| |
| try: |
| if GV.opt['-v']: print '[Info]', strcmd(cmd) |
| proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
| rc = proc.wait() |
| if rc: |
| raise Exception('ssh error with the following command:\n%s' % cmd) |
| out = proc.stdout.read(-1) |
| seglist[host] = out.strip() |
| except KeyboardInterrupt: |
| killProc(proc) |
| raise |
| except Exception, err: |
| print err |
| sys.exit(-1) |
| |
| return seglist, uniqhosts |
| |
| ############# |
| def runNetPerfTestMatrix(): |
| ''' |
| Runs full matrix network test where each host connnects with |
| every link on every other host. |
| Returns performance results and a dictionary mapping segment_name |
| to hostname |
| ''' |
| (netperf, hostlist, netserver_port) = setupNetPerfTest() |
| if not netperf: |
| return None |
| |
| # dict() of seglist[segname] = hostname, uniqhosts[hostname] = 1 segment name |
| seglist, uniqhosts = get_host_map(hostlist) |
| |
| # spawn netperf between all hosts |
| result = [] |
| procList = [] |
| for host in uniqhosts: |
| for segment in seglist: |
| if seglist[segment] == host: |
| pass |
| else: |
| p = spawnNetperfTestBetween( host, segment, netperf, netserver_port, GV.opt['--duration']) |
| if p[0]: |
| procList.append(p) |
| |
| for (proc, srcHost, dstHost) in procList: |
| res = reapNetperfTest(proc, srcHost, dstHost) |
| if res and len(res) >= 6: result.append(res) |
| |
| return result, seglist |
| |
| |
| ############# |
| def printMatrixResult(result, seglist): |
| |
| print 'Full matrix netperf bandwidth test' |
| |
| # sum up Rx/Tx rate for each host |
| netTx = dict() |
| netRx = dict() |
| for h in result: |
| if netTx.has_key( h[0] ): |
| netTx[ h[0] ] += float( h[6] ) |
| else: |
| netTx[ h[0] ] = float( h[6] ) |
| |
| # netRx requires that we lookup the hostname for a given segment name |
| if netRx.has_key( seglist[ h[1] ] ): |
| netRx[ seglist[ h[1] ] ] += float( h[6] ) |
| else: |
| netRx[ seglist[ h[1] ] ] = float( h[6] ) |
| |
| # print Tx rates |
| print "\nPer host transfer rates" |
| for host in netTx: |
| print '%s Tx rate: %.2f' % (host, netTx[host]) |
| print |
| |
| # print Rx rates |
| print "Per host receive rates" |
| for host in netRx: |
| print '%s Rx rate: %.2f' % (host, netRx[host]) |
| print |
| |
| # print per link results in verbose mode |
| if GV.opt['-v']: |
| for h in result: |
| print '%s -> %s = %f' % (h[0], h[1], h[6]) |
| |
| sum = min = max = avg = 0 |
| n = map(lambda x: float(x[6]), result) |
| sum = reduce(lambda x, y: x + y, n) |
| min = reduce(lambda x, y: x > y and y or x, n, 9999999999) |
| max = reduce(lambda x, y: x > y and x or y, n) |
| avg = float(sum) / len(result) |
| |
| copy = n[:] |
| copy.sort() |
| median = copy[len(copy) / 2] |
| |
| print '' |
| print 'Summary:' |
| print 'sum = %.2f MB/sec' % sum |
| print 'min = %.2f MB/sec' % min |
| print 'max = %.2f MB/sec' % max |
| print 'avg = %.2f MB/sec' % avg |
| print 'median = %.2f MB/sec' % median |
| print '' |
| |
| |
| ############# |
| def printNetResult(result): |
| |
| print 'Netperf bisection bandwidth test' |
| for h in result: |
| print '%s -> %s = %f' % (h[0], h[1], h[6]) |
| |
| sum = min = max = avg = 0 |
| n = map(lambda x: float(x[6]), result) |
| sum = reduce(lambda x, y: x + y, n) |
| min = reduce(lambda x, y: x > y and y or x, n, 9999999999) |
| max = reduce(lambda x, y: x > y and x or y, n) |
| avg = float(sum) / len(result) |
| |
| copy = n[:] |
| copy.sort() |
| median = copy[len(copy) / 2] |
| |
| print '' |
| print 'Summary:' |
| print 'sum = %.2f MB/sec' % sum |
| print 'min = %.2f MB/sec' % min |
| print 'max = %.2f MB/sec' % max |
| print 'avg = %.2f MB/sec' % avg |
| print 'median = %.2f MB/sec' % median |
| print '' |
| |
| limit = max * 0.9 |
| for r in result: |
| if r[6] < limit: |
| print '[Warning] connection between %s and %s is no good' % (r[0], r[1]) |
| |
| |
| |
| ################ |
| def printResult(title, result): |
| totTime = 0 |
| totBytes = 0 |
| totMBPS = 0 |
| min = 99999999 |
| max = 0 |
| minHost = maxHost = None |
| for h in result: |
| (mbps, time, bytes) = result[h] |
| totTime += time |
| totBytes += bytes |
| totMBPS += mbps |
| if min > mbps: |
| min = mbps |
| minHost = h |
| if max < mbps: |
| max = mbps |
| maxHost = h |
| |
| avgTime = totTime / len(result) |
| |
| print |
| if (totTime > 0): print ' %s avg time (sec): %3.2f' % (title, avgTime) |
| if (totBytes > 0): print ' %s tot bytes: %d' % (title, totBytes) |
| print ' %s tot bandwidth (MB/s): %3.2f' % (title, totMBPS) |
| print ' %s min bandwidth (MB/s): %3.2f [%s]' % (title, min, minHost) |
| print ' %s max bandwidth (MB/s): %3.2f [%s]' % (title, max, maxHost) |
| if GV.opt['-D']: |
| print ' -- per host bandwidth --' |
| for h in result: |
| (mbps, time, bytes) = result[h] |
| print ' %s bandwidth (MB/s): %3.2f [%s]' % (title, mbps, h) |
| print |
| |
| |
| ################ |
| |
| coverage = GpCoverage() |
| coverage.start() |
| |
| try: |
| parseCommandLine() |
| runSetup() |
| diskWriteResult = diskReadResult = streamResult = netResult = None |
| tornDown = False |
| try: |
| if GV.opt['-r'].find('d') >= 0: |
| multidd = copyExecOver('multidd') |
| diskWriteResult = runDiskWriteTest(multidd) |
| diskReadResult = runDiskReadTest(multidd) |
| |
| if GV.opt['-r'].find('s') >= 0: |
| streamResult = runStreamTest() |
| |
| if GV.opt['--net'] == 'netperf': |
| netResult = runNetPerfTest() |
| elif GV.opt['--net'] == 'parallel': |
| netResult = runNetPerfTestParallel() |
| elif GV.opt['--net'] == 'matrix': |
| netResult, seglist = runNetPerfTestMatrix() |
| |
| runTeardown() |
| |
| finally: |
| print |
| print '====================' |
| print '== RESULT' |
| print '====================' |
| |
| if diskWriteResult: |
| printResult('disk write', diskWriteResult) |
| |
| if diskReadResult: |
| printResult('disk read', diskReadResult) |
| |
| if streamResult: |
| printResult('stream', streamResult) |
| |
| if netResult and GV.opt['--net'] == 'matrix': |
| printMatrixResult(netResult, seglist) |
| elif netResult and GV.opt['--net']: |
| printNetResult(netResult) |
| |
| runTeardown() |
| |
| except KeyboardInterrupt: |
| print '[Abort] Keyboard Interrupt ...' |
| finally: |
| coverage.stop() |
| coverage.generate_report() |
| |