tools/dev/benchmarks/suite1/benchmark.py - subversion - Git at Google

 #!/usr/bin/env python

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 """Usage: benchmark.py run|list|compare|show|chart <selection> ...

 SELECTING TIMINGS -- B@R,LxS

 In the subcommands below, a timings selection consists of a string with up to
 four elements:
   <branch>@<revision>,<levels>x<spread>
 abbreviated as:
   B@R,LxS

 <branch> is a label of an svn branch, e.g. "1.7.x".
 <revision> is the last-changed-revision of above branch.
 <levels> is the number of directory levels created in the benchmark.
 <spread> is the number of child trees spreading off each dir level.

 <branch_name> and <revision> are simply used for labeling. Upon the actual
 test runs, you should enter labels matching the selected --svn-bin-dir.
 Later, you can select runs individually by using these labels.

 For <revision>, you can provide special keywords:
 - 'each' has the same effect as entering each available revision number that
   is on record in the db in a separate timings selection.
 - 'last' is the same as 'each', but shows only the last 10 revisions. 'last'
   can be combined with a number, e.g. 'last12'.

 For all subcommands except 'run', you can omit some or all of the elements of
 a timings selection to combine all available timings sets. Try that out with
 the 'list' subcommand.

 Examples:
   benchmark.py run 1.7.x@12345,5x5
   benchmark.py show trunk@12345
   benchmark.py compare 1.7.0,1x100 trunk@each,1x100
   benchmark.py chart compare 1.7.0,5x5 trunk@last12,5x5


 RUN BENCHMARKS

   benchmark.py run B@R,LxS [N] [options]

 Test data is added to an sqlite database created automatically, by default
 'benchmark.db' in the current working directory. To specify a different path,
 use option -f <path_to_db>.

 If <N> is provided, the run is repeated N times.

 <levels> and <spread> control the way the tested working copy is structured:
   <levels>: number of directory levels to create.
   <spread>: number of files and subdirectories created in each dir.


 LIST WHAT IS ON RECORD

   benchmark.py list [B@R,LxS]

 Find entries in the database for the given constraints. Any arguments can
 be omitted. (To select only a rev, start with a '@', like '@123'; to select
 only spread, start with an 'x', like "x100".)

 Call without arguments to get a listing of all available constraints.


 COMPARE TIMINGS

   benchmark.py compare B@R,LxS B@R,LxS [B@R,LxS [...]]

 Compare any number of timings sets to the first provided set (in text mode).
 For example:
   benchmark.py compare 1.7.0 trunk@1349903
     Compare the total timings of all combined '1.7.0' branch runs to
     all combined runs of 'trunk'-at-revision-1349903.
   benchmark.py compare 1.7.0,5x5 trunk@1349903,5x5
     Same as above, but only compare the working copy types with 5 levels
     and a spread of 5.

 Use the -c option to limit comparison to specific command names.


 SHOW TIMINGS

   benchmark.py show B@R,LxS [B@R,LxS [...]]

 Print out a summary of the timings selected from the given constraints.


 GENERATE CHARTS

   benchmark.py chart compare B@R,LxS B@R,LxS [ B@R,LxS ... ]

 Produce a bar chart that compares any number of sets of timings.  Like with
 the plain 'compare' command, the first set is taken as a reference point for
 100% and +-0 seconds. Each following dataset produces a set of labeled bar
 charts, grouped by svn command names. At least two timings sets must be
 provided.

 Use the -c option to limit comparison to specific command names.


 EXAMPLES

 # Run 3 benchmarks on svn 1.7.0 with 5 dir levels and 5 files and subdirs for
 # each level (spread). Timings are saved in ./benchmark.db.
 # Provide label '1.7.0' and its Last-Changed-Rev for later reference.
 ./benchmark.py run --svn-bin-dir ~/svn-prefix/1.7.0/bin 1.7.0@1181106,5x5 3

 # Record 3 benchmark runs on trunk, again naming its Last-Changed-Rev.
 # (You may also set your $PATH instead of using --svn-bin-dir.)
 ./benchmark.py run --svn-bin-dir ~/svn-prefix/trunk/bin trunk@1352725,5x5 3

 # Work with the results of above two runs
 ./benchmark.py list
 ./benchmark.py compare 1.7.0 trunk
 ./benchmark.py show 1.7.0 trunk
 ./benchmark.py chart compare 1.7.0 trunk
 ./benchmark.py chart compare 1.7.0 trunk -c "update,commit,TOTAL RUN"

 # Rebuild r1352598, run it and chart improvements since 1.7.0.
 svn up -r1352598 ~/src/trunk
 make -C ~/src/trunk dist-clean install
 export PATH="$HOME/svn-prefix/trunk/bin:$PATH"
 which svn
 ./benchmark.py run trunk@1352598,5x5 3
 ./benchmark.py chart compare 1.7.0 trunk@1352598 trunk@1352725 -o chart.svg


 GLOBAL OPTIONS"""

 import os
 import time
 import datetime
 import sqlite3
 import optparse
 import tempfile
 import subprocess
 import random
 import shutil
 import stat
 import string
 from copy import copy

 IGNORE_COMMANDS = ('--version', )
 TOTAL_RUN = 'TOTAL RUN'

 j = os.path.join

 def bail(msg=None):
   if msg:
     print msg
   exit(1)

 def time_str():
   return time.strftime('%Y-%m-%d %H:%M:%S');

 def timedelta_to_seconds(td):
   return ( float(td.seconds)
            + float(td.microseconds) / (10**6)
            + td.days * 24 * 60 * 60 )

 def run_cmd(cmd, stdin=None, shell=False, verbose=False):
   if options.verbose:
     if shell:
       printable_cmd = cmd
     else:
       printable_cmd = ' '.join(cmd)
     print 'CMD:', printable_cmd

   if stdin:
     stdin_arg = subprocess.PIPE
   else:
     stdin_arg = None

   p = subprocess.Popen(cmd,
                        stdin=stdin_arg,
                        stdout=subprocess.PIPE,
                        stderr=subprocess.PIPE,
                        shell=shell)
   stdout,stderr = p.communicate(input=stdin)

   if verbose:
     if (stdout):
       print "STDOUT: [[[\n%s]]]" % ''.join(stdout)
   if (stderr):
     print "STDERR: [[[\n%s]]]" % ''.join(stderr)

   return stdout, stderr


 _next_unique_basename_count = 0

 def next_unique_basename(prefix):
   global _next_unique_basename_count
   _next_unique_basename_count += 1
   return '_'.join((prefix, str(_next_unique_basename_count)))


 si_units = [
     (1000 ** 5, 'P'),
     (1000 ** 4, 'T'),
     (1000 ** 3, 'G'),
     (1000 ** 2, 'M'),
     (1000 ** 1, 'K'),
     (1000 ** 0, ''),
     ]
 def n_label(n):
     """(stolen from hurry.filesize)"""
     for factor, suffix in si_units:
         if n >= factor:
             break
     amount = int(n/factor)
     if isinstance(suffix, tuple):
         singular, multiple = suffix
         if amount == 1:
             suffix = singular
         else:
             suffix = multiple
     return str(amount) + suffix


 def split_arg_once(l_r, sep):
   if not l_r:
     return (None, None)
   if sep in l_r:
     l, r = l_r.split(sep)
   else:
     l = l_r
     r = None
   if not l:
     l = None
   if not r:
     r = None
   return (l, r)

 RUN_KIND_SEPARATORS=('@', ',', 'x')

 class RunKind:
   def __init__(self, b_r_l_s):
     b_r, l_s = split_arg_once(b_r_l_s, RUN_KIND_SEPARATORS[1])
     self.branch, self.revision = split_arg_once(b_r, RUN_KIND_SEPARATORS[0])
     self.levels, self.spread = split_arg_once(l_s, RUN_KIND_SEPARATORS[2])
     if self.levels: self.levels = int(self.levels)
     if self.spread: self.spread = int(self.spread)

   def label(self):
     label_parts = []
     if self.branch:
       label_parts.append(self.branch)
     if self.revision:
       label_parts.append(RUN_KIND_SEPARATORS[0])
       label_parts.append(self.revision)
     if self.levels or self.spread:
       label_parts.append(RUN_KIND_SEPARATORS[1])
       if self.levels:
         label_parts.append(str(self.levels))
       if self.spread:
         label_parts.append(RUN_KIND_SEPARATORS[2])
         label_parts.append(str(self.spread))
     return ''.join(label_parts)

   def args(self):
     return (self.branch, self.revision, self.levels, self.spread)


 def parse_timings_selections(db, *args):
   run_kinds = []

   for arg in args:
     run_kind = RunKind(arg)

     if run_kind.revision == 'each':
       run_kind.revision = None
       query = TimingQuery(db, run_kind)
       for revision in query.get_sorted_revisions():
         revision_run_kind = copy(run_kind)
         revision_run_kind.revision = revision
         run_kinds.append(revision_run_kind)
     elif run_kind.revision and run_kind.revision.startswith('last'):
       Nstr = run_kind.revision[4:]
       if not Nstr:
         N = 10
       else:
         N = int(Nstr)
       run_kind.revision = None
       query = TimingQuery(db, run_kind)
       for revision in query.get_sorted_revisions()[-N:]:
         revision_run_kind = copy(run_kind)
         revision_run_kind.revision = revision
         run_kinds.append(revision_run_kind)
     else:
       run_kinds.append(run_kind)

   return run_kinds

 def parse_one_timing_selection(db, *args):
   run_kinds = parse_timings_selections(db, *args)
   if len(run_kinds) != 1:
     bail("I need exactly one timings identifier, not '%s'"
          % (' '.join(*args)))
   return run_kinds[0]


 PATHNAME_VALID_CHARS = "-_.,@%s%s" % (string.ascii_letters, string.digits)
 def filesystem_safe_string(s):
   return ''.join(c for c in s if c in PATHNAME_VALID_CHARS)

 def do_div(ref, val):
   if ref:
     return float(val) / float(ref)
   else:
     return 0.0

 def do_diff(ref, val):
   return float(val) - float(ref)


 # ------------------------- database -------------------------

 class TimingsDb:
   def __init__(self, db_path):
     self.db_path = db_path;
     self.conn = sqlite3.connect(db_path)
     self.ensure_tables_created()

   def ensure_tables_created(self):
     c = self.conn.cursor()

     c.execute("""SELECT name FROM sqlite_master WHERE type='table' AND
               name='batch'""")
     if c.fetchone():
       # exists
       return

     print 'Creating database tables.'
     c.executescript('''
         CREATE TABLE batch (
           batch_id INTEGER PRIMARY KEY AUTOINCREMENT,
           started TEXT,
           ended TEXT
         );

         CREATE TABLE run_kind (
           run_kind_id INTEGER PRIMARY KEY AUTOINCREMENT,
           branch TEXT NOT NULL,
           revision TEXT NOT NULL,
           wc_levels INTEGER,
           wc_spread INTEGER,
           UNIQUE(branch, revision, wc_levels, wc_spread)
         );

         CREATE TABLE run (
           run_id INTEGER PRIMARY KEY AUTOINCREMENT,
           batch_id INTEGER NOT NULL REFERENCES batch(batch_id),
           run_kind_id INTEGER NOT NULL REFERENCES run_kind(run_kind_id),
           started TEXT,
           ended TEXT,
           aborted INTEGER
         );

         CREATE TABLE timings (
           run_id INTEGER NOT NULL REFERENCES run(run_id),
           command TEXT NOT NULL,
           sequence INTEGER,
           timing REAL
         );'''
       )
     self.conn.commit()
     c.close();


 class Batch:
   def __init__(self, db):
     self.db = db
     self.started = time_str()
     c = db.conn.cursor()
     c.execute("INSERT INTO batch (started) values (?)", (self.started,))
     db.conn.commit()
     self.id = c.lastrowid
     c.close()

   def done(self):
     conn = self.db.conn
     c = conn.cursor()
     c.execute("""
         UPDATE batch
         SET ended = ?
         WHERE batch_id = ?""",
         (time_str(), self.id))
     conn.commit()
     c.close()

 class Run:
   def __init__(self, batch, run_kind):
     self.batch = batch
     conn = self.batch.db.conn
     c = conn.cursor()

     c.execute("""
         SELECT run_kind_id FROM run_kind
         WHERE branch = ?
           AND revision = ?
           AND wc_levels = ?
           AND wc_spread = ?""",
         run_kind.args())
     kind_ids = c.fetchone()
     if kind_ids:
       kind_id = kind_ids[0]
     else:
       c.execute("""
           INSERT INTO run_kind (branch, revision, wc_levels, wc_spread)
           VALUES (?, ?, ?, ?)""",
           run_kind.args())
       conn.commit()
       kind_id = c.lastrowid

     self.started = time_str()

     c.execute("""
         INSERT INTO run
           (batch_id, run_kind_id, started)
         VALUES
           (?, ?, ?)""",
         (self.batch.id, kind_id, self.started))
     conn.commit()
     self.id = c.lastrowid
     c.close();
     self.tic_at = None
     self.current_command = None
     self.timings = []

   def tic(self, command):
     if command in IGNORE_COMMANDS:
       return
     self.toc()
     self.current_command = command
     self.tic_at = datetime.datetime.now()

   def toc(self):
     if self.current_command and self.tic_at:
       toc_at = datetime.datetime.now()
       self.remember_timing(self.current_command,
                          timedelta_to_seconds(toc_at - self.tic_at))
     self.current_command = None
     self.tic_at = None

   def remember_timing(self, command, seconds):
     self.timings.append((command, seconds))

   def submit_timings(self):
     conn = self.batch.db.conn
     c = conn.cursor()
     print 'submitting...'

     c.executemany("""
       INSERT INTO timings
         (run_id, command, sequence, timing)
       VALUES
         (?, ?, ?, ?)""",
       [(self.id, t[0], (i + 1), t[1]) for i,t in enumerate(self.timings)])

     conn.commit()
     c.close()

   def done(self, aborted=False):
     conn = self.batch.db.conn
     c = conn.cursor()
     c.execute("""
         UPDATE run
         SET ended = ?, aborted = ?
         WHERE run_id = ?""",
         (time_str(), aborted, self.id))
     conn.commit()
     c.close()


 class TimingQuery:
   def __init__(self, db, run_kind):
     self.cursor = db.conn.cursor()
     self.constraints = []
     self.values = []
     self.timings = None
     self.FROM_WHERE = """
          FROM batch AS b,
               timings AS t,
               run AS r,
               run_kind as k
          WHERE
               t.run_id = r.run_id
               AND k.run_kind_id = r.run_kind_id
               AND b.batch_id = r.batch_id
               AND r.aborted = 0
          """
     self.append_constraint('k.branch', run_kind.branch)
     self.each_revision = False
     if run_kind.revision == 'each':
       self.each_revision = True
     else:
       self.append_constraint('k.revision', run_kind.revision)
     self.append_constraint('k.wc_levels', run_kind.levels)
     self.append_constraint('k.wc_spread', run_kind.spread)
     self.label = run_kind.label()

   def append_constraint(self, column_name, val):
     if val:
       self.constraints.append('AND %s = ?' % column_name)
       self.values.append(val)

   def remove_last_constraint(self):
     del self.constraints[-1]
     del self.values[-1]

   def get_sorted_X(self, x, n=1):
     query = ['SELECT DISTINCT %s' % x,
              self.FROM_WHERE ]
     query.extend(self.constraints)
     query.append('ORDER BY %s' % x)
     c = db.conn.cursor()
     try:
       c.execute(' '.join(query), self.values)
       if n == 1:
         return [tpl[0] for tpl in c.fetchall()]
       else:
         return c.fetchall()
     finally:
       c.close()

   def get_sorted_command_names(self):
     return self.get_sorted_X('t.command')

   def get_sorted_branches(self):
     return self.get_sorted_X('k.branch')

   def get_sorted_revisions(self):
     return self.get_sorted_X('k.revision')

   def get_sorted_levels_spread(self):
     return self.get_sorted_X('k.wc_levels,k.wc_spread', n = 2)

   def count_runs_batches(self):
     query = ["""SELECT
                   count(DISTINCT r.run_id),
                   count(DISTINCT b.batch_id)""",
              self.FROM_WHERE ]
     query.extend(self.constraints)
     c = db.conn.cursor()
     try:
       #print ' '.join(query)
       c.execute(' '.join(query), self.values)
       return c.fetchone()
     finally:
       c.close()

   def get_command_timings(self, command):
     query = ["""SELECT
                   count(t.timing),
                   min(t.timing),
                   max(t.timing),
                   avg(t.timing)""",
              self.FROM_WHERE ]
     self.append_constraint('t.command', command)
     try:
       query.extend(self.constraints)
       c = db.conn.cursor()
       try:
         c.execute(' '.join(query), self.values)
         return c.fetchone()
       finally:
         c.close()
     finally:
       self.remove_last_constraint()

   def get_timings(self):
     if self.timings:
       return self.timings
     self.timings = {}
     for command_name in self.get_sorted_command_names():
       self.timings[command_name] = self.get_command_timings(command_name)
     return self.timings


 # ------------------------------------------------------------ run tests


 def perform_run(batch, run_kind,
                 svn_bin, svnadmin_bin, verbose):

   run = Run(batch, run_kind)

   def create_tree(in_dir, _levels, _spread):
     try:
       os.mkdir(in_dir)
     except:
       pass

     for i in range(_spread):
       # files
       fn = j(in_dir, next_unique_basename('file'))
       f = open(fn, 'w')
       f.write('This is %s\n' % fn)
       f.close()

       # dirs
       if (_levels > 1):
         dn = j(in_dir, next_unique_basename('dir'))
         create_tree(dn, _levels - 1, _spread)

   def svn(*args):
     name = args[0]

     cmd = [ svn_bin ]
     cmd.extend( list(args) )
     if verbose:
       print 'svn cmd:', ' '.join(cmd)

     stdin = None
     if stdin:
       stdin_arg = subprocess.PIPE
     else:
       stdin_arg = None

     run.tic(name)
     try:
       p = subprocess.Popen(cmd,
                            stdin=stdin_arg,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                            shell=False)
       stdout,stderr = p.communicate(input=stdin)
     except OSError:
       stdout = stderr = None
     finally:
       run.toc()

     if verbose:
       if (stdout):
         print "STDOUT: [[[\n%s]]]" % ''.join(stdout)
       if (stderr):
         print "STDERR: [[[\n%s]]]" % ''.join(stderr)

     return stdout,stderr


   def add(*args):
     return svn('add', *args)

   def ci(*args):
     return svn('commit', '-mm', *args)

   def up(*args):
     return svn('update', *args)

   def st(*args):
     return svn('status', *args)

   def info(*args):
     return svn('info', *args)

   _chars = [chr(x) for x in range(ord('a'), ord('z') +1)]

   def randstr(len=8):
     return ''.join( [random.choice(_chars) for i in range(len)] )

   def _copy(path):
     dest = next_unique_basename(path + '_copied')
     svn('copy', path, dest)

   def _move(path):
     dest = path + '_moved'
     svn('move', path, dest)

   def _propmod(path):
     so, se = svn('proplist', path)
     propnames = [line.strip() for line in so.strip().split('\n')[1:]]

     # modify?
     if len(propnames):
       svn('ps', propnames[len(propnames) / 2], randstr(), path)

     # del?
     if len(propnames) > 1:
       svn('propdel', propnames[len(propnames) / 2], path)

   def _propadd(path):
     # set a new one.
     svn('propset', randstr(), randstr(), path)

   def _mod(path):
     if os.path.isdir(path):
       _propmod(path)
       return

     f = open(path, 'a')
     f.write('\n%s\n' % randstr())
     f.close()

   def _add(path):
     if os.path.isfile(path):
       return _mod(path)

     if random.choice((True, False)):
       # create a dir
       svn('mkdir', j(path, next_unique_basename('new_dir')))
     else:
       # create a file
       new_path = j(path, next_unique_basename('new_file'))
       f = open(new_path, 'w')
       f.write(randstr())
       f.close()
       svn('add', new_path)

   def _del(path):
     svn('delete', path)

   _mod_funcs = (_mod, _add, _propmod, _propadd, )#_copy,) # _move, _del)

   def modify_tree(in_dir, fraction):
     child_names = os.listdir(in_dir)
     for child_name in child_names:
       if child_name[0] == '.':
         continue
       if random.random() < fraction:
         path = j(in_dir, child_name)
         random.choice(_mod_funcs)(path)

     for child_name in child_names:
       if child_name[0] == '.': continue
       path = j(in_dir, child_name)
       if os.path.isdir(path):
         modify_tree(path, fraction)

   def propadd_tree(in_dir, fraction):
     for child_name in os.listdir(in_dir):
       if child_name[0] == '.': continue
       path = j(in_dir, child_name)
       if random.random() < fraction:
         _propadd(path)
       if os.path.isdir(path):
         propadd_tree(path, fraction)


   def rmtree_onerror(func, path, exc_info):
     """Error handler for ``shutil.rmtree``.

     If the error is due to an access error (read only file)
     it attempts to add write permission and then retries.

     If the error is for another reason it re-raises the error.

     Usage : ``shutil.rmtree(path, onerror=onerror)``
     """
     if not os.access(path, os.W_OK):
       # Is the error an access error ?
       os.chmod(path, stat.S_IWUSR)
       func(path)
     else:
       raise

   base = tempfile.mkdtemp()

   # ensure identical modifications for every run
   random.seed(0)

   aborted = True

   try:
     repos = j(base, 'repos')
     repos = repos.replace('\\', '/')
     wc = j(base, 'wc')
     wc2 = j(base, 'wc2')

     if repos.startswith('/'):
       file_url = 'file://%s' % repos
     else:
       file_url = 'file:///%s' % repos

     print '\nRunning svn benchmark in', base
     print 'dir levels: %s; new files and dirs per leaf: %s' %(
           run_kind.levels, run_kind.spread)

     started = datetime.datetime.now()

     try:
       run_cmd([svnadmin_bin, 'create', repos])
       svn('checkout', file_url, wc)

       trunk = j(wc, 'trunk')
       create_tree(trunk, run_kind.levels, run_kind.spread)
       add(trunk)
       st(wc)
       ci(wc)
       up(wc)
       propadd_tree(trunk, 0.05)
       ci(wc)
       up(wc)
       st(wc)
       info('-R', wc)

       trunk_url = file_url + '/trunk'
       branch_url = file_url + '/branch'

       svn('copy', '-mm', trunk_url, branch_url)
       st(wc)

       up(wc)
       st(wc)
       info('-R', wc)

       svn('checkout', trunk_url, wc2)
       st(wc2)
       modify_tree(wc2, 0.5)
       st(wc2)
       ci(wc2)
       up(wc2)
       up(wc)

       svn('switch', branch_url, wc2)
       modify_tree(wc2, 0.5)
       st(wc2)
       info('-R', wc2)
       ci(wc2)
       up(wc2)
       up(wc)

       modify_tree(trunk, 0.5)
       st(wc)
       ci(wc)
       up(wc2)
       up(wc)

       svn('merge', '--accept=postpone', trunk_url, wc2)
       st(wc2)
       info('-R', wc2)
       svn('resolve', '--accept=mine-conflict', wc2)
       st(wc2)
       svn('resolved', '-R', wc2)
       st(wc2)
       info('-R', wc2)
       ci(wc2)
       up(wc2)
       up(wc)

       svn('merge', '--accept=postpone', '--reintegrate', branch_url, trunk)
       st(wc)
       svn('resolve', '--accept=mine-conflict', wc)
       st(wc)
       svn('resolved', '-R', wc)
       st(wc)
       ci(wc)
       up(wc2)
       up(wc)

       svn('delete', j(wc, 'branch'))
       ci(wc)
       up(wc)

       aborted = False

     finally:
       stopped = datetime.datetime.now()
       print '\nDone with svn benchmark in', (stopped - started)

       run.remember_timing(TOTAL_RUN,
                         timedelta_to_seconds(stopped - started))
   finally:
     run.done(aborted)
     run.submit_timings()
     shutil.rmtree(base, onerror=rmtree_onerror)

   return aborted


 # ---------------------------------------------------------------------


 def cmdline_run(db, options, run_kind_str, N=1):
   run_kind = parse_one_timing_selection(db, run_kind_str)

   N = int(N)

   print 'Hi, going to run a Subversion benchmark series of %d runs...' % N
   print 'Label is %s' % run_kind.label()

   # can we run the svn binaries?
   svn_bin = j(options.svn_bin_dir, 'svn')
   svnadmin_bin = j(options.svn_bin_dir, 'svnadmin')

   for b in (svn_bin, svnadmin_bin):
     so,se = run_cmd([b, '--version'])
     if not so:
       bail("Can't run %s" % b)

     print ', '.join([s.strip() for s in so.split('\n')[:2]])

   batch = Batch(db)

   for i in range(N):
     print 'Run %d of %d' % (i + 1, N)
     perform_run(batch, run_kind,
                 svn_bin, svnadmin_bin, options.verbose)

   batch.done()


 def cmdline_list(db, options, *args):
   run_kinds = parse_timings_selections(db, *args)

   for run_kind in run_kinds:

     constraints = []
     def add_if_not_none(name, val):
       if val:
         constraints.append('  %s = %s' % (name, val))
     add_if_not_none('branch', run_kind.branch)
     add_if_not_none('revision', run_kind.revision)
     add_if_not_none('levels', run_kind.levels)
     add_if_not_none('spread', run_kind.spread)
     if constraints:
       print 'For\n', '\n'.join(constraints)
     print 'I found:'

     d = TimingQuery(db, run_kind)

     cmd_names = d.get_sorted_command_names()
     if cmd_names:
       print '\n%d command names:\n ' % len(cmd_names), '\n  '.join(cmd_names)

     branches = d.get_sorted_branches()
     if branches and (len(branches) > 1 or branches[0] != run_kind.branch):
       print '\n%d branches:\n ' % len(branches), '\n  '.join(branches)

     revisions = d.get_sorted_revisions()
     if revisions and (len(revisions) > 1 or revisions[0] != run_kind.revision):
       print '\n%d revisions:\n ' % len(revisions), '\n  '.join(revisions)

     levels_spread = d.get_sorted_levels_spread()
     if levels_spread and (
          len(levels_spread) > 1
          or levels_spread[0] != (run_kind.levels, run_kind.spread)):
       print '\n%d kinds of levels x spread:\n ' % len(levels_spread), '\n  '.join(
               [ ('%dx%d' % (l, s)) for l,s in levels_spread ])

     print "\n%d runs in %d batches.\n" % (d.count_runs_batches())


 def cmdline_show(db, options, *run_kind_strings):
   run_kinds = parse_timings_selections(db, *run_kind_strings)
   for run_kind in run_kinds:
     q = TimingQuery(db, run_kind)
     timings = q.get_timings()

     s = []
     s.append('Timings for %s' % run_kind.label())
     s.append('   N    min     max     avg   operation  (unit is seconds)')

     for command_name in q.get_sorted_command_names():
       if options.command_names and command_name not in options.command_names:
         continue
       n, tmin, tmax, tavg = timings[command_name]

       s.append('%4s %7.2f %7.2f %7.2f  %s' % (
                  n_label(n),
                  tmin,
                  tmax,
                  tavg,
                  command_name))

     print '\n'.join(s)


 def cmdline_compare(db, options, *args):
   run_kinds = parse_timings_selections(db, *args)
   if len(run_kinds) < 2:
     bail("Need at least two sets of timings to compare.")


   left_kind = run_kinds[0]
   leftq = TimingQuery(db, left_kind)
   left = leftq.get_timings()
   if not left:
     bail("No timings for %s" % left_kind.label())

   for run_kind_idx in range(1, len(run_kinds)):
     right_kind = run_kinds[run_kind_idx]

     rightq = TimingQuery(db, right_kind)
     right = rightq.get_timings()
     if not right:
       print "No timings for %s" % right_kind.label()
       continue

     label = 'Compare %s to %s' % (right_kind.label(), left_kind.label())

     s = [label]

     verbose = options.verbose
     if not verbose:
       s.append('       N        avg         operation')
     else:
       s.append('       N        min              max              avg         operation')

     command_names = [name for name in leftq.get_sorted_command_names()
                      if name in right]
     if options.command_names:
       command_names = [name for name in command_names
                        if name in options.command_names]

     for command_name in command_names:
       left_N, left_min, left_max, left_avg = left[command_name]
       right_N, right_min, right_max, right_avg = right[command_name]

       N_str = '%s/%s' % (n_label(left_N), n_label(right_N))
       avg_str = '%7.2f|%+7.3f' % (do_div(left_avg, right_avg),
                                   do_diff(left_avg, right_avg))

       if not verbose:
         s.append('%9s %-16s  %s' % (N_str, avg_str, command_name))
       else:
         min_str = '%7.2f|%+7.3f' % (do_div(left_min, right_min),
                                     do_diff(left_min, right_min))
         max_str = '%7.2f|%+7.3f' % (do_div(left_max, right_max),
                                     do_diff(left_max, right_max))

         s.append('%9s %-16s %-16s %-16s  %s' % (N_str, min_str, max_str, avg_str,
                                             command_name))

     s.extend([
       '(legend: "1.23|+0.45" means: slower by factor 1.23 and by 0.45 seconds;',
       ' factor < 1 and seconds < 0 means \'%s\' is faster.'
       % right_kind.label(),
       ' "2/3" means: \'%s\' has 2 timings on record, the other has 3.)'
       % left_kind.label()
       ])


     print '\n'.join(s)


 # ------------------------------------------------------- charts

 def cmdline_chart_compare(db, options, *args):
   import matplotlib
   matplotlib.use('Agg')
   import numpy as np
   import matplotlib.pylab as plt

   labels = []
   timing_sets = []
   command_names = None

   run_kinds = parse_timings_selections(db, *args)

   # iterate the timings selections and accumulate data
   for run_kind in run_kinds:
     query = TimingQuery(db, run_kind)
     timings = query.get_timings()
     if not timings:
       print "No timings for %s" % run_kind.label()
       continue
     labels.append(run_kind.label())
     timing_sets.append(timings)

     # it only makes sense to compare those commands that have timings
     # in the first selection, because that is the one everything else
     # is compared to. Remember the first selection's command names.
     if not command_names:
       command_names = query.get_sorted_command_names()


   if len(timing_sets) < 2:
     bail("Not enough timings")

   if options.command_names:
     command_names = [name for name in command_names
                      if name in options.command_names]

   chart_path = options.chart_path
   if not chart_path:
     chart_path = 'compare_' + '_'.join(
       [ filesystem_safe_string(l) for l in labels ]
       ) + '.svg'

   N = len(command_names)
   M = len(timing_sets) - 1
   if M < 2:
     M = 2

   group_positions = np.arange(N)  # the y locations for the groups
   dist = 1. / (1. + M)
   height = (1. - dist) / M     # the height of the bars

   fig = plt.figure(figsize=(12, 5 + 0.2*N*M))
   plot1 = fig.add_subplot(121)
   plot2 = fig.add_subplot(122)

   left = timing_sets[0]

   # Iterate timing sets. Each loop produces one bar for each command name
   # group.
   for label_i,label in enumerate(labels[1:],1):
     right = timing_sets[label_i]
     if not right:
       continue

     for cmd_i, command_name in enumerate(command_names):
       if command_name not in right:
         #skip
         continue

       left_N, left_min, left_max, left_avg = left[command_name]
       right_N, right_min, right_max, right_avg = right[command_name]

       div_avg = 100. * (do_div(left_avg, right_avg) - 1.0)
       if div_avg <= 0:
         col = '#55dd55'
       else:
         col = '#dd5555'

       diff_val = do_diff(left_avg, right_avg)

       ofs = (dist + height) / 2. + height * (label_i - 1)

       barheight = height * (1.0 - dist)

       y = float(cmd_i) + ofs

       plot1.barh((y, ),
                  (div_avg, ),
                  barheight,
                  color=col, edgecolor='white')
       plot1.text(0., y + height/2.,
                  '%s %+5.1f%%' % (label, div_avg),
                  ha='right', va='center', size='small',
                  rotation=0, family='monospace')

       plot2.barh((y, ),
                  (diff_val, ),
                  barheight,
                  color=col, edgecolor='white')
       plot2.text(0., y + height/2.,
                  '%s %+6.2fs' % (label, diff_val),
                  ha='right', va='center', size='small',
                  rotation=0, family='monospace')


   for p in (plot1, plot2):
     xlim = list(p.get_xlim())
     if xlim[1] < 10.:
       xlim[1] = 10.
     # make sure the zero line is far enough right so that the annotations
     # fit inside the chart. About half the width should suffice.
     if xlim[0] > -xlim[1]:
       xlim[0] = -xlim[1]
     p.set_xlim(*xlim)
     p.set_xticks((0,))
     p.set_yticks(group_positions + (height / 2.))
     p.set_yticklabels(())
     p.set_ylim((len(command_names), 0))
     p.grid()

   plot1.set_xticklabels(('+-0%',), rotation=0)
   plot1.set_title('Average runtime change from %s in %%' % labels[0],
                   size='medium')

   plot2.set_xticklabels(('+-0s',), rotation=0)
   plot2.set_title('Average runtime change from %s in seconds' % labels[0],
                   size='medium')

   margin = 1./(2 + N*M)
   titlemargin = 0
   if options.title:
     titlemargin = margin * 1.5

   fig.subplots_adjust(left=0.005, right=0.995, wspace=0.3, bottom=margin,
                       top=1.0-margin-titlemargin)

   ystep = (1.0 - 2.*margin - titlemargin) / len(command_names)

   for idx,command_name in enumerate(command_names):
     ylabel = '%s\nvs. %.1fs' % (
                      command_name,
                      left[command_name][3])

     ypos=1.0 - margin - titlemargin - ystep/M - ystep * idx
     plt.figtext(0.5, ypos,
                 command_name,
                 ha='center', va='top',
                 size='medium', weight='bold')
     plt.figtext(0.5, ypos - ystep/(M+1),
                 '%s\n= %.2fs' % (
                   labels[0], left[command_name][3]),
                 ha='center', va='top',
                 size='small')

   if options.title:
     plt.figtext(0.5, 1. - titlemargin/2, options.title, ha='center',
                 va='center', weight='bold')

   plt.savefig(chart_path)
   print 'wrote chart file:', chart_path


 # ------------------------------------------------------------ main


 # Custom option formatter, keeping newlines in the description.
 # adapted from:
 # http://groups.google.com/group/comp.lang.python/msg/09f28e26af0699b1
 import textwrap
 class IndentedHelpFormatterWithNL(optparse.IndentedHelpFormatter):
   def format_description(self, description):
     if not description: return ""
     desc_width = self.width - self.current_indent
     indent = " "*self.current_indent
     bits = description.split('\n')
     formatted_bits = [
       textwrap.fill(bit,
         desc_width,
         initial_indent=indent,
         subsequent_indent=indent)
       for bit in bits]
     result = "\n".join(formatted_bits) + "\n"
     return result

 if __name__ == '__main__':
   parser = optparse.OptionParser(formatter=IndentedHelpFormatterWithNL())
   # -h is automatically added.
   ### should probably expand the help for that. and see about -?
   parser.add_option('-v', '--verbose', action='store_true', dest='verbose',
                     help='Verbose operation')
   parser.add_option('-b', '--svn-bin-dir', action='store', dest='svn_bin_dir',
                     default='',
                     help='Specify directory to find Subversion binaries in')
   parser.add_option('-f', '--db-path', action='store', dest='db_path',
                     default='benchmark.db',
                     help='Specify path to SQLite database file')
   parser.add_option('-o', '--chart-path', action='store', dest='chart_path',
                     help='Supply a path for chart output.')
   parser.add_option('-c', '--command-names', action='store',
                     dest='command_names',
                     help='Comma separated list of command names to limit to.')
   parser.add_option('-t', '--title', action='store',
                     dest='title',
                     help='For charts, a title to print in the chart graphics.')

   parser.set_description(__doc__)
   parser.set_usage('')


   options, args = parser.parse_args()

   def usage(msg=None):
     parser.print_help()
     if msg:
       print
       print msg
     bail()

   # there should be at least one arg left: the sub-command
   if not args:
     usage('No command argument supplied.')

   cmd = args[0]
   del args[0]

   db = TimingsDb(options.db_path)

   if cmd == 'run':
     if len(args) < 1 or len(args) > 2:
       usage()
     cmdline_run(db, options, *args)

   elif cmd == 'compare':
     if len(args) < 2:
       usage()
     cmdline_compare(db, options, *args)

   elif cmd == 'list':
     cmdline_list(db, options, *args)

   elif cmd == 'show':
     cmdline_show(db, options, *args)

   elif cmd == 'chart':
     if 'compare'.startswith(args[0]):
       cmdline_chart_compare(db, options, *args[1:])
     else:
       usage()

   else:
     usage('Unknown subcommand argument: %s' % cmd)