blob: 883b413bd5be4f4d24ee684ad93c2f3025f963ae [file] [log] [blame]
#!/usr/bin/env python
#
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
#
# Generate a report of each area each committer has touched over all time.
#
# $ svn log -v ^/ > svnlogdata
# $ ./analyze-svnlogs.py < svnlogdata > report.txt
#
# NOTE: ./logdata.py is written with a cached version of the data extracted
# from 'svnlogdata'. That data can be analyzed in many ways, beyond
# what this script is reporting.
#
import sys
import re
RE_LOG_HEADER = re.compile('^(r[0-9]+) '
'\| ([^|]+) '
'\| ([^|]+) '
'\| ([0-9]+) line')
RE_PATH = re.compile(r' [MARD] (.*?)( \(from .*\))?$')
SEPARATOR = '-' * 72
def parse_one_commit(logfile):
line = logfile.readline().strip()
if line != SEPARATOR:
raise ParseError('missing separator: %s' % line)
line = logfile.readline()
if not line:
# end of file!
return None, None
m = RE_LOG_HEADER.match(line)
if not m:
raise ParseError('could not match log header')
revision = m.group(1)
author = m.group(2)
num_lines = int(m.group(4))
paths = set()
# skip "Changed paths:"
line = logfile.readline().strip()
if not line:
# there were no paths. just a blank before the log message. continue on.
sys.stderr.write('Funny revision: %s\n' % revision)
else:
if not line.startswith('Changed'):
raise ParseError('log not run with -v. paths missing in %s' % revision)
# gather all the affected paths
while 1:
line = logfile.readline().rstrip()
if not line:
# just hit end of the changed paths
break
m = RE_PATH.match(line)
if not m:
raise ParseError('bad path in %s: %s' % (revision, line))
paths.add(m.group(1))
# suck up the log message
for i in range(num_lines):
logfile.readline()
return author, paths
def parse_file(logfile):
authors = { }
while True:
author, paths = parse_one_commit(logfile)
if author is None:
return authors
if author in authors:
authors[author] = authors[author].union(paths)
else:
authors[author] = paths
def write_logdata(authors):
out = open('logdata.py', 'w')
out.write('authors = {\n')
for author, paths in authors.items():
out.write(" '%s': set([\n" % author)
for path in paths:
out.write(' %s,\n' % repr(path))
out.write(' ]),\n')
out.write('}\n')
def get_key(sectionroots, path):
key = None
for section in sectionroots:
if path.startswith(section):
# add one path element below top section to the key.
elmts = len(section.split('/')) + 1
# strip first element (always empty because path starts with '/')
key = tuple(path.split('/', elmts)[1:elmts])
break
if key == None:
# strip first element (always empty because path starts with '/')
key = tuple(path.split('/', 3)[1:3])
return key
def print_report(authors, sectionroots=[ ]):
for author, paths in sorted(authors.items()):
topdirs = { }
for path in paths:
key = get_key(sectionroots, path)
if key in topdirs:
topdirs[key] += 1
else:
topdirs[key] = 1
print(author)
tags = [ ]
branches = [ ]
for topdir in sorted(topdirs):
if len(topdir) == 1:
assert topdirs[topdir] == 1
print(' %s (ROOT)' % topdir[0])
else:
if topdir[0] == 'tags':
if not topdir[1] in tags:
tags.append(topdir[1])
elif topdir[0] == 'branches':
if not topdir[1] in branches:
branches.append(topdir[1])
else:
print(' %s (%d items)' % ('/'.join(topdir), topdirs[topdir]))
if tags:
print(' TAGS: %s' % ', '.join(tags))
if branches:
print(' BRANCHES: %s' % ', '.join(branches))
print('')
def run(logfile):
try:
import logdata
authors = logdata.authors
except ImportError:
authors = parse_file(logfile)
write_logdata(authors)
sectionroots = [
'/trunk/subversion/include/private',
'/trunk/subversion/include',
'/trunk/subversion/tests',
'/trunk/subversion',
'/trunk/tools',
'/trunk/contrib',
'/trunk/doc',
];
print_report(authors, sectionroots)
class ParseError(Exception):
pass
if __name__ == '__main__':
if len(sys.argv) > 1:
logfile = open(sys.argv[1])
else:
logfile = sys.stdin
run(logfile)