| #!/usr/bin/env python |
| |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| """ |
| usage: benchmark.py run <run_file> <levels> <spread> [N] |
| benchmark.py show <run_file> |
| benchmark.py compare <run_file1> <run_file2> |
| benchmark.py combine <new_file> <run_file1> <run_file2> ... |
| |
| Test data is written to run_file. |
| If a run_file exists, data is added to it. |
| <levels> is the number of directory levels to create |
| <spread> is the number of child trees spreading off each dir level |
| If <N> is provided, the run is repeated N times. |
| """ |
| |
| import os |
| import sys |
| import tempfile |
| import subprocess |
| import datetime |
| import random |
| import shutil |
| import cPickle |
| import optparse |
| import stat |
| |
| TOTAL_RUN = 'TOTAL RUN' |
| |
| timings = None |
| |
| def run_cmd(cmd, stdin=None, shell=False): |
| |
| if shell: |
| printable_cmd = 'CMD: ' + cmd |
| else: |
| printable_cmd = 'CMD: ' + ' '.join(cmd) |
| if options.verbose: |
| print printable_cmd |
| |
| if stdin: |
| stdin_arg = subprocess.PIPE |
| else: |
| stdin_arg = None |
| |
| p = subprocess.Popen(cmd, |
| stdin=stdin_arg, |
| stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE, |
| shell=shell) |
| stdout,stderr = p.communicate(input=stdin) |
| |
| if options.verbose: |
| if (stdout): |
| print "STDOUT: [[[\n%s]]]" % ''.join(stdout) |
| if (stderr): |
| print "STDERR: [[[\n%s]]]" % ''.join(stderr) |
| |
| return stdout,stderr |
| |
| def timedelta_to_seconds(td): |
| return ( float(td.seconds) |
| + float(td.microseconds) / (10**6) |
| + td.days * 24 * 60 * 60 ) |
| |
| |
| class Timings: |
| |
| def __init__(self, *ignore_svn_cmds): |
| self.timings = {} |
| self.current_name = None |
| self.tic_at = None |
| self.ignore = ignore_svn_cmds |
| self.name = None |
| |
| def tic(self, name): |
| if name in self.ignore: |
| return |
| self.toc() |
| self.current_name = name |
| self.tic_at = datetime.datetime.now() |
| |
| def toc(self): |
| if self.current_name and self.tic_at: |
| toc_at = datetime.datetime.now() |
| self.submit_timing(self.current_name, |
| timedelta_to_seconds(toc_at - self.tic_at)) |
| self.current_name = None |
| self.tic_at = None |
| |
| def submit_timing(self, name, seconds): |
| times = self.timings.get(name) |
| if not times: |
| times = [] |
| self.timings[name] = times |
| times.append(seconds) |
| |
| def min_max_avg(self, name): |
| ttimings = self.timings.get(name) |
| return ( min(ttimings), |
| max(ttimings), |
| reduce(lambda x,y: x + y, ttimings) / len(ttimings) ) |
| |
| def summary(self): |
| s = [] |
| if self.name: |
| s.append('Timings for %s' % self.name) |
| s.append(' N min max avg operation (unit is seconds)') |
| |
| names = sorted(self.timings.keys()) |
| |
| for name in names: |
| timings = self.timings.get(name) |
| if not name or not timings: continue |
| |
| tmin, tmax, tavg = self.min_max_avg(name) |
| |
| s.append('%5d %7.2f %7.2f %7.2f %s' % ( |
| len(timings), |
| tmin, |
| tmax, |
| tavg, |
| name)) |
| |
| return '\n'.join(s) |
| |
| |
| def compare_to(self, other): |
| def do_div(a, b): |
| if b: |
| return float(a) / float(b) |
| else: |
| return 0.0 |
| |
| def do_diff(a, b): |
| return float(a) - float(b) |
| |
| selfname = self.name |
| if not selfname: |
| selfname = 'unnamed' |
| othername = other.name |
| if not othername: |
| othername = 'the other' |
| |
| selftotal = self.min_max_avg(TOTAL_RUN)[2] |
| othertotal = other.min_max_avg(TOTAL_RUN)[2] |
| |
| s = ['COMPARE %s to %s' % (othername, selfname)] |
| |
| if TOTAL_RUN in self.timings and TOTAL_RUN in other.timings: |
| s.append(' %s times: %5.1f seconds avg for %s' % (TOTAL_RUN, |
| othertotal, othername)) |
| s.append(' %s %5.1f seconds avg for %s' % (' ' * len(TOTAL_RUN), |
| selftotal, selfname)) |
| |
| |
| s.append(' min max avg operation') |
| |
| names = sorted(self.timings.keys()) |
| |
| for name in names: |
| if not name in other.timings: |
| continue |
| |
| |
| min_me, max_me, avg_me = self.min_max_avg(name) |
| min_other, max_other, avg_other = other.min_max_avg(name) |
| |
| s.append('%-16s %-16s %-16s %s' % ( |
| '%7.2f|%+7.3f' % ( |
| do_div(min_me, min_other), |
| do_diff(min_me, min_other) |
| ), |
| |
| '%7.2f|%+7.3f' % ( |
| do_div(max_me, max_other), |
| do_diff(max_me, max_other) |
| ), |
| |
| '%7.2f|%+7.3f' % ( |
| do_div(avg_me, avg_other), |
| do_diff(avg_me, avg_other) |
| ), |
| |
| name)) |
| |
| s.extend([ |
| '("1.23|+0.45" means factor=1.23, difference in seconds = 0.45', |
| 'factor < 1 or difference < 0 means \'%s\' is faster than \'%s\')' |
| % (self.name, othername)]) |
| |
| return '\n'.join(s) |
| |
| |
| def add(self, other): |
| for name, other_times in other.timings.items(): |
| my_times = self.timings.get(name) |
| if not my_times: |
| my_times = [] |
| self.timings[name] = my_times |
| my_times.extend(other_times) |
| |
| |
| |
| |
| j = os.path.join |
| |
| _create_count = 0 |
| |
| def next_name(prefix): |
| global _create_count |
| _create_count += 1 |
| return '_'.join((prefix, str(_create_count))) |
| |
| def create_tree(in_dir, levels, spread=5): |
| try: |
| os.mkdir(in_dir) |
| except: |
| pass |
| |
| for i in range(spread): |
| # files |
| fn = j(in_dir, next_name('file')) |
| f = open(fn, 'w') |
| f.write('This is %s\n' % fn) |
| f.close() |
| |
| # dirs |
| if (levels > 1): |
| dn = j(in_dir, next_name('dir')) |
| create_tree(dn, levels - 1, spread) |
| |
| |
| def svn(*args): |
| name = args[0] |
| |
| ### options comes from the global namespace; it should be passed |
| cmd = [options.svn] + list(args) |
| if options.verbose: |
| print 'svn cmd:', ' '.join(cmd) |
| |
| stdin = None |
| if stdin: |
| stdin_arg = subprocess.PIPE |
| else: |
| stdin_arg = None |
| |
| ### timings comes from the global namespace; it should be passed |
| timings.tic(name) |
| try: |
| p = subprocess.Popen(cmd, |
| stdin=stdin_arg, |
| stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE, |
| shell=False) |
| stdout,stderr = p.communicate(input=stdin) |
| except OSError: |
| stdout = stderr = None |
| finally: |
| timings.toc() |
| |
| if options.verbose: |
| if (stdout): |
| print "STDOUT: [[[\n%s]]]" % ''.join(stdout) |
| if (stderr): |
| print "STDERR: [[[\n%s]]]" % ''.join(stderr) |
| |
| return stdout,stderr |
| |
| |
| def add(*args): |
| return svn('add', *args) |
| |
| def ci(*args): |
| return svn('commit', '-mm', *args) |
| |
| def up(*args): |
| return svn('update', *args) |
| |
| def st(*args): |
| return svn('status', *args) |
| |
| _chars = [chr(x) for x in range(ord('a'), ord('z') +1)] |
| |
| def randstr(len=8): |
| return ''.join( [random.choice(_chars) for i in range(len)] ) |
| |
| def _copy(path): |
| dest = next_name(path + '_copied') |
| svn('copy', path, dest) |
| |
| def _move(path): |
| dest = path + '_moved' |
| svn('move', path, dest) |
| |
| def _propmod(path): |
| so, se = svn('proplist', path) |
| propnames = [line.strip() for line in so.strip().split('\n')[1:]] |
| |
| # modify? |
| if len(propnames): |
| svn('ps', propnames[len(propnames) / 2], randstr(), path) |
| |
| # del? |
| if len(propnames) > 1: |
| svn('propdel', propnames[len(propnames) / 2], path) |
| |
| |
| def _propadd(path): |
| # set a new one. |
| svn('propset', randstr(), randstr(), path) |
| |
| |
| def _mod(path): |
| if os.path.isdir(path): |
| return _propmod(path) |
| |
| f = open(path, 'a') |
| f.write('\n%s\n' % randstr()) |
| f.close() |
| |
| def _add(path): |
| if os.path.isfile(path): |
| return _mod(path) |
| |
| if random.choice((True, False)): |
| # create a dir |
| svn('mkdir', j(path, next_name('new_dir'))) |
| else: |
| # create a file |
| new_path = j(path, next_name('new_file')) |
| f = open(new_path, 'w') |
| f.write(randstr()) |
| f.close() |
| svn('add', new_path) |
| |
| def _del(path): |
| svn('delete', path) |
| |
| _mod_funcs = (_mod, _add, _propmod, _propadd, )#_copy,) # _move, _del) |
| |
| def modify_tree(in_dir, fraction): |
| child_names = os.listdir(in_dir) |
| for child_name in child_names: |
| if child_name[0] == '.': |
| continue |
| if random.random() < fraction: |
| path = j(in_dir, child_name) |
| random.choice(_mod_funcs)(path) |
| |
| for child_name in child_names: |
| if child_name[0] == '.': continue |
| path = j(in_dir, child_name) |
| if os.path.isdir(path): |
| modify_tree(path, fraction) |
| |
| def propadd_tree(in_dir, fraction): |
| for child_name in os.listdir(in_dir): |
| if child_name[0] == '.': continue |
| path = j(in_dir, child_name) |
| if random.random() < fraction: |
| _propadd(path) |
| if os.path.isdir(path): |
| propadd_tree(path, fraction) |
| |
| |
| def rmtree_onerror(func, path, exc_info): |
| """Error handler for ``shutil.rmtree``. |
| |
| If the error is due to an access error (read only file) |
| it attempts to add write permission and then retries. |
| |
| If the error is for another reason it re-raises the error. |
| |
| Usage : ``shutil.rmtree(path, onerror=onerror)`` |
| """ |
| if not os.access(path, os.W_OK): |
| # Is the error an access error ? |
| os.chmod(path, stat.S_IWUSR) |
| func(path) |
| else: |
| raise |
| |
| |
| def run(levels, spread, N): |
| for i in range(N): |
| base = tempfile.mkdtemp() |
| |
| # ensure identical modifications for every run |
| random.seed(0) |
| |
| try: |
| repos = j(base, 'repos') |
| repos = repos.replace('\\', '/') |
| wc = j(base, 'wc') |
| wc2 = j(base, 'wc2') |
| |
| if repos.startswith('/'): |
| file_url = 'file://%s' % repos |
| else: |
| file_url = 'file:///%s' % repos |
| |
| so, se = svn('--version') |
| if not so: |
| print "Can't find svn." |
| exit(1) |
| version = ', '.join([s.strip() for s in so.split('\n')[:2]]) |
| |
| print '\nRunning svn benchmark in', base |
| print 'dir levels: %s; new files and dirs per leaf: %s; run %d of %d' %( |
| levels, spread, i + 1, N) |
| |
| print version |
| started = datetime.datetime.now() |
| |
| try: |
| run_cmd(['svnadmin', 'create', repos]) |
| svn('checkout', file_url, wc) |
| |
| trunk = j(wc, 'trunk') |
| create_tree(trunk, levels, spread) |
| add(trunk) |
| st(wc) |
| ci(wc) |
| up(wc) |
| propadd_tree(trunk, 0.5) |
| ci(wc) |
| up(wc) |
| st(wc) |
| |
| trunk_url = file_url + '/trunk' |
| branch_url = file_url + '/branch' |
| |
| svn('copy', '-mm', trunk_url, branch_url) |
| st(wc) |
| |
| up(wc) |
| st(wc) |
| |
| svn('checkout', trunk_url, wc2) |
| st(wc2) |
| modify_tree(wc2, 0.5) |
| st(wc2) |
| ci(wc2) |
| up(wc2) |
| up(wc) |
| |
| svn('switch', branch_url, wc2) |
| modify_tree(wc2, 0.5) |
| st(wc2) |
| ci(wc2) |
| up(wc2) |
| up(wc) |
| |
| modify_tree(trunk, 0.5) |
| st(wc) |
| ci(wc) |
| up(wc2) |
| up(wc) |
| |
| svn('merge', '--accept=postpone', trunk_url, wc2) |
| st(wc2) |
| svn('resolve', '--accept=mine-conflict', wc2) |
| st(wc2) |
| svn('resolved', '-R', wc2) |
| st(wc2) |
| ci(wc2) |
| up(wc2) |
| up(wc) |
| |
| svn('merge', '--accept=postpone', '--reintegrate', branch_url, trunk) |
| st(wc) |
| svn('resolve', '--accept=mine-conflict', wc) |
| st(wc) |
| svn('resolved', '-R', wc) |
| st(wc) |
| ci(wc) |
| up(wc2) |
| up(wc) |
| |
| svn('delete', j(wc, 'branch')) |
| ci(wc) |
| up(wc2) |
| up(wc) |
| |
| |
| finally: |
| stopped = datetime.datetime.now() |
| print '\nDone with svn benchmark in', (stopped - started) |
| |
| ### timings comes from the global namespace; it should be passed |
| timings.submit_timing(TOTAL_RUN, |
| timedelta_to_seconds(stopped - started)) |
| |
| # rename ps to prop mod |
| if timings.timings.get('ps'): |
| has = timings.timings.get('prop mod') |
| if not has: |
| has = [] |
| timings.timings['prop mod'] = has |
| has.extend( timings.timings['ps'] ) |
| del timings.timings['ps'] |
| |
| print timings.summary() |
| finally: |
| shutil.rmtree(base, onerror=rmtree_onerror) |
| |
| |
| def read_from_file(file_path): |
| f = open(file_path, 'rb') |
| try: |
| instance = cPickle.load(f) |
| instance.name = os.path.basename(file_path) |
| finally: |
| f.close() |
| return instance |
| |
| |
| def write_to_file(file_path, instance): |
| f = open(file_path, 'wb') |
| cPickle.dump(instance, f) |
| f.close() |
| |
| def cmd_compare(path1, path2): |
| t1 = read_from_file(path1) |
| t2 = read_from_file(path2) |
| |
| print t1.summary() |
| print '---' |
| print t2.summary() |
| print '---' |
| print t2.compare_to(t1) |
| |
| def cmd_combine(dest, *paths): |
| total = Timings('--version'); |
| |
| for path in paths: |
| t = read_from_file(path) |
| total.add(t) |
| |
| print total.summary() |
| write_to_file(dest, total) |
| |
| def cmd_run(timings_path, levels, spread, N=1): |
| levels = int(levels) |
| spread = int(spread) |
| N = int(N) |
| |
| print '\n\nHi, going to run a Subversion benchmark series of %d runs...' % N |
| |
| ### UGH! should pass to run() |
| global timings |
| |
| if os.path.isfile(timings_path): |
| print 'Going to add results to existing file', timings_path |
| timings = read_from_file(timings_path) |
| else: |
| print 'Going to write results to new file', timings_path |
| timings = Timings('--version') |
| |
| run(levels, spread, N) |
| |
| write_to_file(timings_path, timings) |
| |
| def cmd_show(*paths): |
| for timings_path in paths: |
| timings = read_from_file(timings_path) |
| print '---\n%s' % timings_path |
| print timings.summary() |
| |
| |
| def usage(): |
| print __doc__ |
| |
| if __name__ == '__main__': |
| parser = optparse.OptionParser() |
| # -h is automatically added. |
| ### should probably expand the help for that. and see about -? |
| parser.add_option('-v', '--verbose', action='store_true', dest='verbose', |
| help='Verbose operation') |
| parser.add_option('--svn', action='store', dest='svn', default='svn', |
| help='Specify Subversion executable to use') |
| |
| ### should start passing this, but for now: make it global |
| global options |
| |
| options, args = parser.parse_args() |
| |
| # there should be at least one arg left: the sub-command |
| if not args: |
| usage() |
| exit(1) |
| |
| cmd = args[0] |
| del args[0] |
| |
| if cmd == 'compare': |
| if len(args) != 2: |
| usage() |
| exit(1) |
| cmd_compare(*args) |
| |
| elif cmd == 'combine': |
| if len(args) < 3: |
| usage() |
| exit(1) |
| cmd_combine(*args) |
| |
| elif cmd == 'run': |
| if len(args) < 3 or len(args) > 4: |
| usage() |
| exit(1) |
| cmd_run(*args) |
| |
| elif cmd == 'show': |
| if not args: |
| usage() |
| exit(1) |
| cmd_show(*args) |
| |
| else: |
| usage() |