|  | #!/usr/bin/python | 
|  |  | 
|  | import sys | 
|  | import re | 
|  | import string | 
|  |  | 
|  | header_re = re.compile(r'^(.*): ?(.*)$') | 
|  |  | 
|  | class NodePath: | 
|  | def __init__(self, path, headers): | 
|  | self.path = path | 
|  | self.headers = headers | 
|  |  | 
|  | def dump(self): | 
|  | print (' ' * 3) + self.path | 
|  | headers = self.headers.keys() | 
|  | headers.sort() | 
|  | for header in headers: | 
|  | print (' ' * 6) + header + ': ' + self.headers[header] | 
|  |  | 
|  |  | 
|  | def dump_revision(rev, nodepaths): | 
|  | sys.stderr.write('* Normalizing revision ' + rev + '...') | 
|  | print 'Revision ' + rev | 
|  | paths = nodepaths.keys() | 
|  | paths.sort() | 
|  | for path in paths: | 
|  | nodepath = nodepaths[path] | 
|  | nodepath.dump() | 
|  | sys.stderr.write('done\n') | 
|  |  | 
|  |  | 
|  |  | 
|  | def parse_header_block(fp): | 
|  | headers = {} | 
|  | while 1: | 
|  | line = fp.readline() | 
|  | if line == '': | 
|  | return headers, 1 | 
|  | line = string.strip(line) | 
|  | if line == '': | 
|  | return headers, 0 | 
|  | matches = header_re.match(line) | 
|  | if not matches: | 
|  | raise Exception('Malformed header block') | 
|  | headers[matches.group(1)] = matches.group(2) | 
|  |  | 
|  |  | 
|  | def parse_file(fp): | 
|  | headers = {} | 
|  | nodepaths = {} | 
|  | harvesting = None # (could be 'R'evision or 'N'ode) | 
|  | current_rev = None | 
|  | eof = 0 | 
|  |  | 
|  | while 1: | 
|  | # Parse a block of headers | 
|  | headers, eof = parse_header_block(fp) | 
|  |  | 
|  | # This is a revision header block | 
|  | if headers.has_key('Revision-number'): | 
|  |  | 
|  | # If there was a previous revision, dump it | 
|  | if current_rev: | 
|  | dump_revision(current_rev, nodepaths) | 
|  |  | 
|  | # Reset the data for this revision | 
|  | current_rev = headers['Revision-number'] | 
|  | nodepaths = {} | 
|  |  | 
|  | # Skip the contents | 
|  | prop_len = headers.get('Prop-content-length', 0) | 
|  | fp.read(int(prop_len)) | 
|  |  | 
|  | # This is a node header block | 
|  | elif headers.has_key('Node-path'): | 
|  |  | 
|  | # Make a new NodePath object, and add it to the | 
|  | # dictionary thereof | 
|  | path = headers['Node-path'] | 
|  | node = NodePath(path, headers) | 
|  | nodepaths[path] = node | 
|  |  | 
|  | # Skip the content | 
|  | text_len = headers.get('Text-content-length', 0) | 
|  | prop_len = headers.get('Prop-content-length', 0) | 
|  | fp.read(int(text_len) + int(prop_len)) | 
|  |  | 
|  | # Not a revision, not a node -- if we've already seen at least | 
|  | # one revision block, we are in an errorful state. | 
|  | elif current_rev and len(headers.keys()): | 
|  | raise Exception('Header block from outta nowhere') | 
|  |  | 
|  | if eof: | 
|  | if current_rev: | 
|  | dump_revision(current_rev, nodepaths) | 
|  | break | 
|  |  | 
|  | def usage(): | 
|  | print 'Usage: ' + sys.argv[0] + ' [DUMPFILE]' | 
|  | print '' | 
|  | print 'Reads a Subversion dumpfile from DUMPFILE (or, if not provided,' | 
|  | print 'from stdin) and normalizes the metadata contained therein,' | 
|  | print 'printing summarized and sorted information.  This is useful for' | 
|  | print 'generating data about dumpfiles in a diffable fashion.' | 
|  | sys.exit(0) | 
|  |  | 
|  | def main(): | 
|  | if len(sys.argv) > 1: | 
|  | if sys.argv[1] == '--help': | 
|  | usage() | 
|  | fp = open(sys.argv[1], 'rb') | 
|  | else: | 
|  | fp = sys.stdin | 
|  | parse_file(fp) | 
|  |  | 
|  |  | 
|  | if __name__ == '__main__': | 
|  | main() | 
|  |  | 
|  |  | 
|  |  | 
|  |  |