cvs2svn.py - subversion - Git at Google

 #!/usr/bin/env python
 #
 # cvs2svn: ...
 #

 import rcsparse
 import os
 import sys
 import sha
 import re
 import time
 import fileinput
 import string
 import getopt
 import statcache
 import stat

 from svn import fs, util, delta, repos


 trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
 branch_tag = re.compile('^[0-9.]+\\.0\\.[0-9]+$')
 vendor_tag = re.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$')

 DATAFILE = 'cvs2svn-data'
 REVS_SUFFIX = '.revs'
 CLEAN_REVS_SUFFIX = '.c-revs'
 SORTED_REVS_SUFFIX = '.s-revs'
 TAGS_SUFFIX = '.tags'
 RESYNC_SUFFIX = '.resync'

 SVNROOT = 'svnroot'
 ATTIC = os.sep + 'Attic'

 COMMIT_THRESHOLD = 5 * 60	# flush a commit if a 5 minute gap occurs

 OP_DELETE = 'D'
 OP_CHANGE = 'C'

 DIGEST_END_IDX = 9 + (sha.digestsize * 2)

 verbose = 1


 class CollectData(rcsparse.Sink):
   def __init__(self, cvsroot, log_fname_base):
     self.cvsroot = cvsroot
     self.revs = open(log_fname_base + '.revs', 'w')
     self.tags = open(log_fname_base + '.tags', 'w')
     self.resync = open(log_fname_base + '.resync', 'w')

   def set_fname(self, fname):
     "Prepare to receive data for a new file."
     self.fname = fname

     # revision -> [timestamp, author, operation, old-timestamp]
     self.rev_data = { }
     self.prev = { }
     self.branch_names = {}
     self.taglist = {}
     self.branchlist = {}

   def set_branch_name(self, revision, name):
     self.branch_names[revision] = name

   def get_branch_name(self, revision):
     brev = revision[:revision.rindex(".")];
     if not self.branch_names.has_key(brev):
       return None
     return self.branch_names[brev]

   def add_branch_point(self, revision, branch_name):
     if not self.branchlist.has_key(revision):
       self.branchlist[revision] = []
     self.branchlist[revision].append(branch_name)

   def add_cvs_branch(self, revision, branch_name):
     last_dot = revision.rfind(".");
     branch_rev = revision[:last_dot];
     last2_dot = branch_rev.rfind(".");
     branch_rev = branch_rev[:last2_dot] + revision[last_dot:];
     self.set_branch_name(branch_rev, branch_name)
     self.add_branch_point(branch_rev[:last2_dot], branch_name)

   def get_tags(self, revision):
     if self.taglist.has_key(revision):
       return self.taglist[revision]
     else:
       return []

   def get_branches(self, revision):
     if self.branchlist.has_key(revision):
       return self.branchlist[revision]
     else:
       return []

   def define_tag(self, name, revision):
     self.tags.write('%s %s %s\n' % (name, revision, self.fname))
     if branch_tag.match(revision):
       self.add_cvs_branch(revision, name)
     elif vendor_tag.match(revision):
       self.set_branch_name(revision, name)
     else:
       if not self.taglist.has_key(revision):
         self.taglist[revision] = [];
       self.taglist[revision].append(name)

   def define_revision(self, revision, timestamp, author, state,
                       branches, next):
     ### what else?
     if state == 'dead':
       op = OP_DELETE
     else:
       op = OP_CHANGE

     # store the rev_data as a list in case we have to jigger the timestamp
     self.rev_data[revision] = [int(timestamp), author, op, None]

     # record the previous revision for sanity checking later
     if trunk_rev.match(revision):
       self.prev[revision] = next
     elif next:
       self.prev[next] = revision
     for b in branches:
       self.prev[b] = revision

   def tree_completed(self):
     "The revision tree has been parsed. Analyze it for consistency."

     # Our algorithm depends upon the timestamps on the revisions occuring
     # monotonically over time. That is, we want to see rev 1.34 occur in
     # time before rev 1.35. If we inserted 1.35 *first* (due to the time-
     # sorting), and then tried to insert 1.34, we'd be screwed.

     # to perform the analysis, we'll simply visit all of the 'previous'
     # links that we have recorded and validate that the timestamp on the
     # previous revision is before the specified revision

     # if we have to resync some nodes, then we restart the scan. just keep
     # looping as long as we need to restart.
     while 1:
       for current, prev in self.prev.items():
         if not prev:
           # no previous revision exists (i.e. the initial revision)
           continue
         t_c = self.rev_data[current][0]
         t_p = self.rev_data[prev][0]
         if t_p >= t_c:
           # the previous revision occurred later than the current revision.
           # shove the previous revision back in time (and any before it that
           # may need to shift).
           while t_p >= t_c:
             self.rev_data[prev][0] = t_c - 1	# new timestamp
             self.rev_data[prev][3] = t_p	# old timestamp

             print 'RESYNC: %s (%s) : old time="%s" new time="%s"' \
                   % (relative_name(self.cvsroot, self.fname),
                      prev, time.ctime(t_p), time.ctime(t_c - 1))

             current = prev
             prev = self.prev[current]
             if not prev:
               break
             t_c = t_c - 1		# self.rev_data[current][0]
             t_p = self.rev_data[prev][0]

           # break from the for-loop
           break
       else:
         # finished the for-loop (no resyncing was performed)
         return

   def set_revision_info(self, revision, log, text):
     timestamp, author, op, old_ts = self.rev_data[revision]
     digest = sha.new(log + '\0' + author).hexdigest()
     if old_ts:
       # the timestamp on this revision was changed. log it for later
       # resynchronization of other files's revisions that occurred
       # for this time and log message.
       self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))

     branch_name = self.get_branch_name(revision)

     write_revs_line(self.revs, timestamp, digest, op, revision, self.fname,
                     branch_name, self.get_tags(revision), self.get_branches(revision))

 def branch_path(ctx, branch_name = None):
   if branch_name:
      return ctx.branches_base + '/' + branch_name + '/'
   else:
      return ctx.trunk_base + '/'

 def get_tag_path(ctx, tag_name):
   return ctx.tags_base + '/' + tag_name + '/'

 def relative_name(cvsroot, fname):
   l = len(cvsroot)
   if fname[:l] == cvsroot:
     if fname[l] == '/':
       return fname[l+1:]
     return fname[l:]
   return l

 def make_path(fs, root, repos_path, f_pool):
   ### hmm. need to clarify OS path separators vs FS path separators
   dirname = os.path.dirname(repos_path)
   if dirname != '/':
     # get the components of the path (skipping the leading '/')
     parts = string.split(dirname[1:], os.sep)
     for i in range(1, len(parts) + 1):
       # reassemble the pieces, adding a leading slash
       parent_dir = '/' + string.join(parts[:i], '/')
       if fs.check_path(root, parent_dir, f_pool) == util.svn_node_none:
         print '    making dir:', parent_dir
         fs.make_dir(root, parent_dir, f_pool)

 def visit_file(arg, dirname, files):
   cd, p, stats = arg
   for fname in files:
     if fname[-2:] != ',v':
       continue
     pathname = os.path.join(dirname, fname)
     if dirname[-6:] == ATTIC:
       # drop the 'Attic' portion from the pathname
       ### we should record this so we can easily insert it back in
       cd.set_fname(os.path.join(dirname[:-6], fname))
     else:
       cd.set_fname(pathname)
     if verbose:
       print pathname
     p.parse(open(pathname), cd)
     stats[0] = stats[0] + 1

 class RevInfoParser(rcsparse.Sink):
   def __init__(self):
     self.authors = { }	# revision -> author
     self.logs = { }	# revision -> log message

   def define_revision(self, revision, timestamp, author, state,
                       branches, next):
     self.authors[revision] = author

   def set_revision_info(self, revision, log, text):
     self.logs[revision] = log

   def parse_cvs_file(self, rcs_pathname):
     try:
       rcsfile = open(rcs_pathname, 'r')
     except:
       try:
         dirname, fname = os.path.split(rcs_pathname)
         rcs_pathname = os.path.join(dirname, "Attic", fname)
         rcsfile = open(rcs_pathname, 'r')
       except:
         ### should use a better error
         raise RuntimeError, ('error: %s appeared to be under CVS control, '
                              'but the RCS file is inaccessible.'
                              % rcs_pathname)

     rcsparse.Parser().parse(rcsfile, self)

 class BuildRevision(rcsparse.Sink):
   def __init__(self, rev, get_metadata=0):
     self.rev = rev
     self.get_metadata = get_metadata
     self.result = None

   def define_revision(self, revision, timestamp, author, state,
                       branches, next):
     for branch in branches:
       self.prev_delta[branch] = revision
     if next:
       self.prev_delta[next] = revision
     if self.get_metadata and revision == self.rev:
       self.author = author

   def tree_completed(self):
     path = [ ]
     revision = self.rev
     while revision:
       path.append(revision)
       revision = self.prev_delta.get(revision)
     path.reverse()
     self.collect = path

   def set_revision_info(self, revision, log, text):
     if not self.collect:
       # nothing more to do
       ### would be nice to halt the file parsing...
       return

     # NOTE: we assume that the deltas appear in the proper order within
     # the RCS file, for streaming application. Thus, our max size is the
     # largest revision of all involved (rather than the revision plus all
     # diff entries).
     if revision != self.collect[0]:
       # not something we are interested in
       return

     if self.get_metadata and revision == self.rev:
       self.log = log

     if self.result is None:
       self.result = string.split(text, '\n')
     else:
       adjust = 0
       diffs = string.split(text, '\n')

       for command in diffs:
         if add_lines_remaining > 0:
           # Insertion lines from a prior "a" command
           self.result.insert(start_line + adjust, command)
           add_lines_remaining = add_lines_remaining - 1
           adjust = adjust + 1
         else:
           dmatch = self.d_command.match(command)
           amatch = self.a_command.match(command)
           if dmatch:
             # "d" - Delete command
             start_line = string.atoi(dmatch.group(1))
             count      = string.atoi(dmatch.group(2))
             begin = start_line + adjust - 1
             del self.result[begin:begin + count]
             adjust = adjust - count
           elif amatch:
             # "a" - Add command
             start_line = string.atoi(amatch.group(1))
             count      = string.atoi(amatch.group(2))
             add_lines_remaining = count
           else:
             raise RuntimeError, 'Error parsing diff commands'

 class Commit:
   def __init__(self):
     self.files = { }
     self.changes = [ ]
     self.deletes = [ ]
     self.t_min = 1<<30
     self.t_max = 0

   def has_file(self, fname):
     return self.files.has_key(fname)

   def add(self, t, op, file, rev, branch_name, tags, branches):
     # record the time range of this commit
     if t < self.t_min:
       self.t_min = t
     if t > self.t_max:
       self.t_max = t

     if op == OP_CHANGE:
       self.changes.append((file, rev, branch_name, tags, branches))
     else:
       # OP_DELETE
       self.deletes.append((file, rev, branch_name, tags, branches))
     self.files[file] = 1

   def get_metadata(self, pool):
     # by definition, the author and log message must be the same for all
     # items that went into this commit. therefore, just grab any item from
     # our record of changes/deletes.
     if self.changes:
       file, rev, br, tags, branches = self.changes[0]
     else:
       # there better be one...
       file, rev, br, tags, branches = self.deletes[0]

     # now, fetch the author/log from the ,v file
     rip = RevInfoParser()
     rip.parse_cvs_file(file)
     author = rip.authors[rev]
     log = rip.logs[rev]

     # format the date properly
     a_t = util.apr_time_ansi_put(self.t_max)[1]
     date = util.svn_time_to_cstring(a_t, pool)

     return author, log, date

   def commit(self, t_fs, ctx):
     # commit this transaction
     print 'committing: %s, over %d seconds' % (time.ctime(self.t_min),
                                                self.t_max - self.t_min)

     if ctx.dry_run:
       for f, r, br, tags, branches in self.changes:
         # compute a repository path. ensure we have a leading "/" and drop
         # the ,v from the file name
         repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot, f[:-2])
         print '    changing %s : %s' % (r, repos_path)
       for f, r, br, tags, branches in self.deletes:
         # compute a repository path. ensure we have a leading "/" and drop
         # the ,v from the file name
         repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot, f[:-2])
         print '    deleting %s : %s' % (r, repos_path)
       print '    (skipped; dry run enabled)'
       return

     # create a pool for the entire commit
     c_pool = util.svn_pool_create(ctx.pool)

     rev = fs.youngest_rev(t_fs, c_pool)
     txn = fs.begin_txn(t_fs, rev, c_pool)
     root = fs.txn_root(txn, c_pool)

     lastcommit = (None, None)

     do_copies = [ ]

     # create a pool for each file; it will be cleared on each iteration
     f_pool = util.svn_pool_create(c_pool)

     for f, r, br, tags, branches in self.changes:
       # compute a repository path. ensure we have a leading "/" and drop
       # the ,v from the file name
       rel_name = relative_name(ctx.cvsroot, f[:-2])
       repos_path = branch_path(ctx, br) + rel_name
       #print 'DEBUG:', repos_path

       print '    changing %s : %s' % (r, repos_path)

       make_path(fs, root, repos_path, f_pool)

       if fs.check_path(root, repos_path, f_pool) == util.svn_node_none:
         created_file = 1
         fs.make_file(root, repos_path, f_pool)
       else:
         created_file = 0

       handler, baton = fs.apply_textdelta(root, repos_path, None, None, f_pool)

       # figure out the real file path for "co"
       try:
         f_st = statcache.stat(f)
       except os.error:
         dirname, fname = os.path.split(f)
         f = os.path.join(dirname, 'Attic', fname)
         f_st = statcache.stat(f)

       pipe = os.popen('co -q -p%s \'%s\'' % (r, f), 'r', 102400)

       # if we just made the file, we can send it in one big hunk, rather
       # than streaming it in.
       ### we should watch out for file sizes here; we don't want to yank
       ### in HUGE files...
       if created_file:
         delta.svn_txdelta_send_string(pipe.read(), handler, baton, f_pool)
         if f_st[0] & stat.S_IXUSR:
           fs.change_node_prop(root, repos_path, "svn:executable", "", f_pool);
       else:
         # open an SVN stream onto the pipe
         stream2 = util.svn_stream_from_aprfile(pipe, f_pool)

         # Get the current file contents from the repo, or, if we have
         # multiple CVS revisions to the same file being done in this
         # single commit, then get the contents of the previous
         # revision from co, or else the delta won't be correct because
         # the contents in the repo won't have changed yet.
         if repos_path == lastcommit[0]:
           infile2 = os.popen("co -q -p%s \'%s\'" % (lastcommit[1], f), "r", 102400)
           stream1 = util.svn_stream_from_aprfile(infile2, f_pool)
         else:
           stream1 = fs.file_contents(root, repos_path, f_pool)

         txstream = delta.svn_txdelta(stream1, stream2, f_pool)
         delta.svn_txdelta_send_txstream(txstream, handler, baton, f_pool)

         # shut down the previous-rev pipe, if we opened it
         infile2 = None

       # shut down the current-rev pipe
       pipe.close()

       # wipe the pool. this will get rid of the pipe streams and the delta
       # stream, and anything the FS may have done.
       util.svn_pool_clear(f_pool)

       # remember what we just did, for the next iteration
       lastcommit = (repos_path, r)

       for to_tag in tags:
         to_tag_path = get_tag_path(ctx, to_tag) + rel_name
         do_copies.append((repos_path, to_tag_path, 1))
       for to_branch in branches:
         to_branch_path = branch_path(ctx, to_branch) + rel_name
         do_copies.append((repos_path, to_branch_path, 2))

     for f, r, br, tags, branches in self.deletes:
       # compute a repository path. ensure we have a leading "/" and drop
       # the ,v from the file name
       rel_name = relative_name(ctx.cvsroot, f[:-2])
       repos_path = branch_path(ctx, br) + rel_name

       print '    deleting %s : %s' % (r, repos_path)

       # If the file was initially added on a branch, the first mainline
       # revision will be marked dead, and thus, attempts to delete it will
       # fail, since it doesn't really exist.
       if r != '1.1':
         ### need to discriminate between OS paths and FS paths
         fs.delete(root, repos_path, f_pool)

       for to_branch in branches:
         to_branch_path = branch_path(ctx, to_branch) + rel_name
         print "file", f, "created on branch", to_branch, "rev", r, "path", to_branch_path

       # wipe the pool, in case the delete loads it up
       util.svn_pool_clear(f_pool)

     # get the metadata for this commit
     author, log, date = self.get_metadata(c_pool)

     # convert locale encoded strings to unicode objects
     l = unicode(log, ctx.encoding)
     a = unicode(author, ctx.encoding)

     # put UTF-8 encoded unicode-"strings" into svn filesystem
     fs.change_txn_prop(txn, 'svn:author', a.encode('utf8'), c_pool)
     fs.change_txn_prop(txn, 'svn:log', l.encode('utf8'), c_pool)

     conflicts, new_rev = fs.commit_txn(txn)

     # set the time to the proper (past) time
     fs.change_rev_prop(t_fs, new_rev, 'svn:date', date, c_pool)

     ### how come conflicts is a newline?
     if conflicts != '\n':
       print '    CONFLICTS:', `conflicts`
     print '    new revision:', new_rev

     if len(do_copies) > 0:
       # make a new transaction for the tags
       rev = fs.youngest_rev(t_fs, c_pool)
       txn = fs.begin_txn(t_fs, rev, c_pool)
       root = fs.txn_root(txn, c_pool)

       for c_from, c_to, c_type in do_copies:
         print "copying", c_from, "to", c_to

         t_root = fs.revision_root(t_fs, rev, f_pool);
         make_path(fs, root, c_to, f_pool)
         fs.copy(t_root, c_from, root, c_to, f_pool)

         # clear the pool after each copy
         util.svn_pool_clear(f_pool)

       log_msg = "%d copies to tags/branches\n" % (len(do_copies))
       fs.change_txn_prop(txn, 'svn:author', "cvs2svn", c_pool)
       fs.change_txn_prop(txn, 'svn:log', log_msg, c_pool)

       conflicts, new_rev = fs.commit_txn(txn)
       if conflicts != '\n':
         print '    CONFLICTS:', `conflicts`
       print '    new revision:', new_rev

       # FIXME: we don't set a date here

     # done with the commit and file pools
     util.svn_pool_destroy(c_pool)

 def read_resync(fname):
   "Read the .resync file into memory."

   ### note that we assume that we can hold the entire resync file in
   ### memory. really large repositories with whacky timestamps could
   ### bust this assumption. should that ever happen, then it is possible
   ### to split the resync file into pieces and make multiple passes,
   ### using each piece.

   #
   # A digest maps to a sequence of lists which specify a lower and upper
   # time bound for matching up the commit. We keep a sequence of these
   # because a number of checkins with the same log message (e.g. an empty
   # log message) could need to be remapped. We also make them a list because
   # we will dynamically expand the lower/upper bound as we find commits
   # that fall into a particular msg and time range.
   #
   # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
   #
   resync = { }

   for line in fileinput.FileInput(fname):
     t1 = int(line[:8], 16)
     digest = line[9:DIGEST_END_IDX]
     t2 = int(line[DIGEST_END_IDX+1:], 16)
     t1_l = t1 - COMMIT_THRESHOLD/2
     t1_u = t1 + COMMIT_THRESHOLD/2
     if resync.has_key(digest):
       resync[digest].append([t1_l, t1_u, t2])
     else:
       resync[digest] = [ [t1_l, t1_u, t2] ]
   return resync

 def parse_revs_line(line):
   data = line.split(' ', 6)
   timestamp = int(data[0], 16)
   id = data[1]
   op = data[2]
   rev = data[3]
   branch_name = data[4]
   if branch_name == "*":
     branch_name = None
   ntags = int(data[5])
   tags = data[6].split(' ', ntags + 1)
   nbranches = int(tags[ntags])
   branches = tags[ntags + 1].split(' ', nbranches)
   fname = branches[nbranches][:-1]  # strip \n
   tags = tags[:ntags]
   branches = branches[:nbranches]

   return timestamp, id, op, rev, fname, branch_name, tags, branches

 def write_revs_line(output, timestamp, digest, op, revision, fname,
                     branch_name, tags, branches):
   output.write('%08lx %s %s %s ' % (timestamp, digest, op, revision))
   if not branch_name:
     branch_name = "*"
   output.write('%s ' % branch_name);
   output.write('%d ' % (len(tags)));
   for tag in tags:
     output.write('%s ' % (tag));
   output.write('%d ' % (len(branches)));
   for branch in branches:
     output.write('%s ' % (branch));
   output.write('%s\n' % fname);

 def pass1(ctx):
   cd = CollectData(ctx.cvsroot, DATAFILE)
   p = rcsparse.Parser()
   stats = [ 0 ]
   os.path.walk(ctx.cvsroot, visit_file, (cd, p, stats))
   if ctx.verbose:
     print 'processed', stats[0], 'files'

 def pass2(ctx):
   "Pass 2: clean up the revision information."

   # We may have recorded some changes in revisions' timestamp. We need to
   # scan for any other files which may have had the same log message and
   # occurred at "the same time" and change their timestamps, too.

   # read the resync data file
   resync = read_resync(ctx.log_fname_base + RESYNC_SUFFIX)

   output = open(ctx.log_fname_base + CLEAN_REVS_SUFFIX, 'w')

   # process the revisions file, looking for items to clean up
   for line in fileinput.FileInput(ctx.log_fname_base + REVS_SUFFIX):
     timestamp, digest, op, rev, fname, branch_name, tags, branches = \
       parse_revs_line(line)
     if not resync.has_key(digest):
       output.write(line)
       continue

     # we have a hit. see if this is "near" any of the resync records we
     # have recorded for this digest [of the log message].
     for record in resync[digest]:
       if record[0] <= timestamp <= record[1]:
         # bingo! remap the time on this (record[2] is the new time).
         write_revs_line(output, record[2], digest, op, rev, fname,
                         branch_name, tags, branches)

         print 'RESYNC: %s (%s) : old time="%s" new time="%s"' \
               % (relative_name(ctx.cvsroot, fname),
                  rev, time.ctime(timestamp), time.ctime(record[2]))

         # adjust the time range. we want the COMMIT_THRESHOLD from the
         # bounds of the earlier/latest commit in this group.
         record[0] = min(record[0], timestamp - COMMIT_THRESHOLD/2)
         record[1] = max(record[1], timestamp + COMMIT_THRESHOLD/2)

         # stop looking for hits
         break
     else:
       # the file/rev did not need to have its time changed.
       output.write(line)

 def pass3(ctx):
   # sort the log files
   os.system('sort %s > %s' % (ctx.log_fname_base + CLEAN_REVS_SUFFIX,
                               ctx.log_fname_base + SORTED_REVS_SUFFIX))

 def pass4(ctx):
   # create the target repository
   if not ctx.dry_run:
     if ctx.create_repos:
       t_repos = repos.svn_repos_create(ctx.target, None, None, None, ctx.pool)
     else:
       t_repos = repos.svn_repos_open(ctx.target, ctx.pool)
     t_fs = repos.svn_repos_fs(t_repos)
   else:
     t_fs = t_repos = None

   # process the logfiles, creating the target
   commits = { }
   count = 0

   for line in fileinput.FileInput(ctx.log_fname_base + SORTED_REVS_SUFFIX):
     timestamp, id, op, rev, fname, branch_name, tags, branches = \
       parse_revs_line(line)

     # scan for commits to process
     process = [ ]
     for scan_id, scan_c in commits.items():

       # ISSUE: the has_file() check below is not optimal.
       # it does fix the dataloss bug where revisions would get lost
       # if checked in too quickly, but it can alco break apart the
       # commits. The correct fix would require tracking the dependencies
       # between change sets and commiting them in proper order.
       if scan_c.t_max + COMMIT_THRESHOLD < timestamp or \
          scan_c.has_file(fname):
         process.append((scan_c.t_max, scan_c))
         del commits[scan_id]

     # sort the commits into time-order, then commit 'em
     process.sort()
     for t_max, c in process:
       c.commit(t_fs, ctx)
     count = count + len(process)

     # add this item into the set of commits we're assembling
     if commits.has_key(id):
       c = commits[id]
     else:
       c = commits[id] = Commit()
     c.add(timestamp, op, fname, rev, branch_name, tags, branches)

   # if there are any pending commits left, then flush them
   if commits:
     process = [ ]
     for id, c in commits.items():
       process.append((c.t_max, c))
     process.sort()
     for t_max, c in process:
       c.commit(t_fs, ctx)
     count = count + len(process)

   if ctx.verbose:
     print count, 'commits processed.'

 _passes = [
   pass1,
   pass2,
   pass3,
   pass4,
   ]

 class _ctx:
   pass

 def convert(pool, ctx, start_pass=1):
   "Convert a CVS repository to an SVN repository."

   ctx.pool = pool

   times = [ None ] * len(_passes)
   for i in range(start_pass - 1, len(_passes)):
     times[i] = time.time()
     if verbose:
       print '----- pass %d -----' % (i + 1)
     _passes[i](ctx)
   times.append(time.time())

   if verbose:
     for i in range(start_pass, len(_passes)+1):
       print 'pass %d: %d seconds' % (i, int(times[i] - times[i-1]))
     print ' total:', int(times[len(_passes)] - times[start_pass-1]), 'seconds'

 def usage(ctx):
   print 'USAGE: %s [-n] [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
         % os.path.basename(sys.argv[0])
   print '  -n               dry run. parse CVS repos, but do not construct SVN repos.'
   print '  -v               verbose.'
   print '  -s PATH          path for SVN repos.'
   print '  -p NUM           start at pass NUM of %d.' % len(_passes)
   print '  --create         create a new SVN repository'
   print '  --trunk=PATH     path for trunk (default: %s)' % ctx.trunk_base
   print '  --branches=PATH  path for branches (default: %s)' % ctx.branches_base
   print '  --tags=PATH      path for tags (default: %s)' % ctx.tags_base
   print '  --encoding=ENC   encoding of log messages in CVS repos (default: %s)' % ctx.encoding
   sys.exit(1)

 def main():
   # prepare the operation context
   ctx = _ctx()
   ctx.cvsroot = None
   ctx.target = SVNROOT
   ctx.log_fname_base = DATAFILE
   ctx.verbose = 0
   ctx.dry_run = 0
   ctx.create_repos = 0
   ctx.trunk_base = "/trunk"
   ctx.tags_base = "/tags"
   ctx.branches_base = "/branches"
   ctx.encoding = "ascii"

   try:
     opts, args = getopt.getopt(sys.argv[1:], 'p:s:vn',
                                [ "create", "trunk=", "branches=", "tags=", "encoding=" ])
   except getopt.GetoptError:
     usage(ctx)
   if len(args) != 1:
     usage(ctx)

   ctx.cvsroot = args[0]
   start_pass = 1

   for opt, value in opts:
     if opt == '-p':
       start_pass = int(value)
       if start_pass < 1 or start_pass > len(_passes):
         print 'ERROR: illegal value (%d) for starting pass. ' \
               'must be 1 through %d.' % (start_pass, len(_passes))
         sys.exit(1)
     elif opt == '-v':
       ctx.verbose = 1
     elif opt == '-n':
       ctx.dry_run = 1
     elif opt == '-s':
       ctx.target = value
     elif opt == '--create':
       ctx.create_repos = 1
     elif opt == '--trunk':
       ctx.trunk_base = value
     elif opt == '--branches':
       ctx.branches_base = value
     elif opt == '--tags':
       ctx.tags_base = value
     elif opt == '--encoding':
       ctx.encoding = value

   util.run_app(convert, ctx, start_pass=start_pass)

 if __name__ == '__main__':
   main()