| #!/usr/bin/env python |
| # |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| # |
| |
| # See usage() for details, or run with --help option. |
| # |
| # .-------------------------------------------------. |
| # | "An ad hoc format deserves an ad hoc parser." | |
| # `-------------------------------------------------' |
| # |
| # Some Subversion project log messages include parseable data to help |
| # track who's contributing what. The exact syntax is described in |
| # http://subversion.apache.org/docs/community-guide/conventions.html#crediting, |
| # but here's an example, indented by three spaces, i.e., the "Patch by:" |
| # starts at the beginning of a line: |
| # |
| # Patch by: David Anderson <david.anderson@calixo.net> |
| # <justin@erenkrantz.com> |
| # me |
| # (I wrote the regression tests.) |
| # Found by: Phineas T. Phinder <phtph@ph1nderz.com> |
| # Suggested by: Snosbig Q. Ptermione <sqptermione@example.com> |
| # Review by: Justin Erenkrantz <justin@erenkrantz.com> |
| # rooneg |
| # (They caught an off-by-one error in the main loop.) |
| # |
| # This is a pathological example, but it shows all the things we might |
| # need to parse. We need to: |
| # |
| # - Detect the officially-approved "WORD by: " fields. |
| # - Grab every name (one per line) in each field. |
| # - Handle names in various formats, unifying where possible. |
| # - Expand "me" to the committer name for this revision. |
| # - Associate a parenthetical aside following a field with that field. |
| # |
| # NOTES: You might be wondering, why not take 'svn log --xml' input? |
| # Well, that would be the Right Thing to do, but in practice this was |
| # a lot easier to whip up for straight 'svn log' output. I'd have no |
| # objection to it being rewritten to take XML input. |
| |
| import functools |
| import os |
| import sys |
| import re |
| import getopt |
| try: |
| my_getopt = getopt.gnu_getopt |
| except AttributeError: |
| my_getopt = getopt.getopt |
| try: |
| # Python >=3.0 |
| from urllib.parse import quote as urllib_parse_quote |
| except ImportError: |
| # Python <3.0 |
| from urllib import quote as urllib_parse_quote |
| |
| |
| # Warnings and errors start with these strings. They are typically |
| # followed by a colon and a space, as in "%s: " ==> "WARNING: ". |
| warning_prefix = 'WARNING' |
| error_prefix = 'ERROR' |
| |
| def complain(msg, fatal=False): |
| """Print MSG as a warning, or if FATAL is true, print it as an error |
| and exit.""" |
| prefix = 'WARNING: ' |
| if fatal: |
| prefix = 'ERROR: ' |
| sys.stderr.write(prefix + msg + '\n') |
| if fatal: |
| sys.exit(1) |
| |
| |
| def html_spam_guard(addr, entities_only=False): |
| """Return a spam-protected version of email ADDR that renders the |
| same in HTML as the original address. If ENTITIES_ONLY, use a less |
| thorough mangling scheme involving entities only, avoiding the use |
| of tags.""" |
| if entities_only: |
| def mangle(x): |
| return "&#%d;" % ord (x) |
| else: |
| def mangle(x): |
| return "<span>&#%d;</span>" % ord(x) |
| return "".join(map(mangle, addr)) |
| |
| |
| def escape_html(str): |
| """Return an HTML-escaped version of STR.""" |
| return str.replace('&', '&').replace('<', '<').replace('>', '>') |
| |
| |
| _spam_guard_in_html_block_re = re.compile(r'<([^&]*@[^&]*)>') |
| def _spam_guard_in_html_block_func(m): |
| return "<%s>" % html_spam_guard(m.group(1)) |
| def spam_guard_in_html_block(str): |
| """Take a block of HTML data, and run html_spam_guard() on parts of it.""" |
| return _spam_guard_in_html_block_re.subn(_spam_guard_in_html_block_func, |
| str)[0] |
| |
| def html_header(title, page_heading=None, highlight_targets=False): |
| """Write HTML file header. TITLE and PAGE_HEADING parameters are |
| expected to already by HTML-escaped if needed. If HIGHLIGHT_TARGETS |
| is true, then write out a style header that causes anchor targets to be |
| surrounded by a red border when they are jumped to.""" |
| if not page_heading: |
| page_heading = title |
| s = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n' |
| s += ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n' |
| s += '<html><head>\n' |
| s += '<meta http-equiv="Content-Type"' |
| s += ' content="text/html; charset=UTF-8" />\n' |
| if highlight_targets: |
| s += '<style type="text/css">\n' |
| s += ':target { border: 2px solid red; }\n' |
| s += '</style>\n' |
| s += '<title>%s</title>\n' % title |
| s += '</head>\n\n' |
| s += '<body style="text-color: black; background-color: white">\n\n' |
| s += '<h1 style="text-align: center">%s</h1>\n\n' % page_heading |
| s += '<hr />\n\n' |
| return s |
| |
| |
| def html_footer(): |
| return '\n</body>\n</html>\n' |
| |
| |
| class Contributor(object): |
| # Map contributor names to contributor instances, so that there |
| # exists exactly one instance associated with a given name. |
| # Fold names with email addresses. That is, if we see someone |
| # listed first with just an email address, but later with a real |
| # name and that same email address together, we create only one |
| # instance, and store it under both the email and the real name. |
| all_contributors = { } |
| |
| def __init__(self, username, real_name, email): |
| """Instantiate a contributor. Don't use this to generate a |
| Contributor for an external caller, though, use .get() instead.""" |
| self.real_name = real_name |
| self.username = username |
| self.email = email |
| self.is_committer = False # Assume not until hear otherwise. |
| self.is_full_committer = False # Assume not until hear otherwise. |
| # Map verbs (e.g., "Patch", "Suggested", "Review") to lists of |
| # LogMessage objects. For example, the log messages stored under |
| # "Patch" represent all the revisions for which this contributor |
| # contributed a patch. |
| self.activities = { } |
| |
| def add_activity(self, field_name, log): |
| """Record that this contributor was active in FIELD_NAME in LOG.""" |
| logs = self.activities.get(field_name) |
| if not logs: |
| logs = [ ] |
| self.activities[field_name] = logs |
| if not log in logs: |
| logs.append(log) |
| |
| @staticmethod |
| def get(username, real_name, email): |
| """If this contributor is already registered, just return it; |
| otherwise, register it then return it. Hint: use parse() to |
| generate the arguments.""" |
| c = None |
| for key in username, real_name, email: |
| if key and key in Contributor.all_contributors: |
| c = Contributor.all_contributors[key] |
| break |
| # If we didn't get a Contributor, create one now. |
| if not c: |
| c = Contributor(username, real_name, email) |
| # If we know identifying information that the Contributor lacks, |
| # then give it to the Contributor now. |
| if username: |
| if not c.username: |
| c.username = username |
| Contributor.all_contributors[username] = c |
| if real_name: |
| if not c.real_name: |
| c.real_name = real_name |
| Contributor.all_contributors[real_name] = c |
| if email: |
| if not c.email: |
| c.email = email |
| Contributor.all_contributors[email] = c |
| # This Contributor has never been in better shape; return it. |
| return c |
| |
| def score(self): |
| """Return a contribution score for this contributor.""" |
| # Right now we count a patch as 2, anything else as 1. |
| score = 0 |
| for activity in self.activities.keys(): |
| if activity == 'Patch': |
| score += len(self.activities[activity]) * 2 |
| else: |
| score += len(self.activities[activity]) |
| return score |
| |
| def score_str(self): |
| """Return a contribution score HTML string for this contributor.""" |
| patch_score = 0 |
| other_score = 0 |
| for activity in self.activities.keys(): |
| if activity == 'Patch': |
| patch_score += len(self.activities[activity]) |
| else: |
| other_score += len(self.activities[activity]) |
| if patch_score == 0: |
| patch_str = "" |
| elif patch_score == 1: |
| patch_str = "1 patch" |
| else: |
| patch_str = "%d patches" % patch_score |
| if other_score == 0: |
| other_str = "" |
| elif other_score == 1: |
| other_str = "1 non-patch" |
| else: |
| other_str = "%d non-patches" % other_score |
| if patch_str: |
| if other_str: |
| return ", ".join((patch_str, other_str)) |
| else: |
| return patch_str |
| else: |
| return other_str |
| |
| def __cmp__(self, other): |
| if self.is_full_committer and not other.is_full_committer: |
| return 1 |
| if other.is_full_committer and not self.is_full_committer: |
| return -1 |
| result = cmp(self.score(), other.score()) |
| if result == 0: |
| return cmp(self.big_name(), other.big_name()) |
| else: |
| return 0 - result |
| |
| def sort_key(self): |
| return (self.is_full_committer, self.score(), self.big_name()) |
| |
| @staticmethod |
| def parse(name): |
| """Parse NAME, which can be |
| |
| - A committer username, or |
| - A space-separated real name, or |
| - A space-separated real name followed by an email address in |
| angle brackets, or |
| - Just an email address in angle brackets. |
| |
| (The email address may have '@' disguised as '{_AT_}'.) |
| |
| Return a tuple of (committer_username, real_name, email_address) |
| any of which can be None if not available in NAME.""" |
| username = None |
| real_name = None |
| email = None |
| name_components = name.split() |
| if len(name_components) == 1: |
| name = name_components[0] # Effectively, name = name.strip() |
| if name[0] == '<' and name[-1] == '>': |
| email = name[1:-1] |
| elif name.find('@') != -1 or name.find('{_AT_}') != -1: |
| email = name |
| else: |
| username = name |
| elif name_components[-1][0] == '<' and name_components[-1][-1] == '>': |
| real_name = ' '.join(name_components[0:-1]) |
| email = name_components[-1][1:-1] |
| else: |
| real_name = ' '.join(name_components) |
| |
| if email is not None: |
| # We unobfuscate here and work with the '@' internally, since |
| # we'll obfuscate it again (differently) before writing it out. |
| email = email.replace('{_AT_}', '@') |
| |
| return username, real_name, email |
| |
| def canonical_name(self): |
| """Return a canonical name for this contributor. The canonical |
| name may or may not be based on the contributor's actual email |
| address. |
| |
| The canonical name will not contain filename-unsafe characters. |
| |
| This method is guaranteed to return the same canonical name every |
| time only if no further contributions are recorded from this |
| contributor after the first call. This is because a contribution |
| may bring a new form of the contributor's name, one which affects |
| the algorithm used to construct canonical names.""" |
| retval = None |
| if self.username: |
| retval = self.username |
| elif self.email: |
| # Take some rudimentary steps to shorten the email address, to |
| # make it more manageable. If this is ever discovered to result |
| # in collisions, we can always just use to the full address. |
| try: |
| at_posn = self.email.index('@') |
| first_dot_after_at = self.email.index('.', at_posn) |
| retval = self.email[0:first_dot_after_at] |
| except ValueError: |
| retval = self.email |
| elif self.real_name: |
| # Last resort: construct canonical name based on real name. |
| retval = ''.join(self.real_name.lower().split(' ')) |
| if retval is None: |
| complain('Unable to construct a canonical name for Contributor.', True) |
| return urllib_parse_quote(retval, safe="!#$&'()+,;<=>@[]^`{}~") |
| |
| def big_name(self, html=False, html_eo=False): |
| """Return as complete a name as possible for this contributor. |
| If HTML, then call html_spam_guard() on email addresses. |
| If HTML_EO, then do the same, but specifying entities_only mode.""" |
| html = html or html_eo |
| name_bits = [] |
| if self.real_name: |
| if html: |
| name_bits.append(escape_html(self.real_name)) |
| else: |
| name_bits.append(self.real_name) |
| if self.email: |
| if not self.real_name and not self.username: |
| name_bits.append(self.email) |
| elif html: |
| name_bits.append("<%s>" % html_spam_guard(self.email, html_eo)) |
| else: |
| name_bits.append("<%s>" % self.email) |
| if self.username: |
| if not self.real_name and not self.email: |
| name_bits.append(self.username) |
| else: |
| name_bits.append("(%s)" % self.username) |
| return " ".join(name_bits) |
| |
| def __str__(self): |
| s = 'CONTRIBUTOR: ' |
| s += self.big_name() |
| s += "\ncanonical name: '%s'" % self.canonical_name() |
| if len(self.activities) > 0: |
| s += '\n ' |
| for activity in self.activities.keys(): |
| val = self.activities[activity] |
| s += '[%s:' % activity |
| for log in val: |
| s += ' %s' % log.revision |
| s += ']' |
| return s |
| |
| def html_out(self, revision_url_pattern, filename): |
| """Create an HTML file named FILENAME, showing all the revisions in which |
| this contributor was active.""" |
| out = open(filename, 'w') |
| out.write(html_header(self.big_name(html_eo=True), |
| self.big_name(html=True), True)) |
| unique_logs = { } |
| |
| sorted_activities = sorted(self.activities.keys()) |
| |
| out.write('<div class="h2" id="activities" title="activities">\n\n') |
| out.write('<table border="1">\n') |
| out.write('<tr>\n') |
| for activity in sorted_activities: |
| out.write('<td>%s</td>\n\n' % activity) |
| out.write('</tr>\n') |
| out.write('<tr>\n') |
| for activity in sorted_activities: |
| out.write('<td>\n') |
| first_activity = True |
| for log in self.activities[activity]: |
| s = ',\n' |
| if first_activity: |
| s = '' |
| first_activity = False |
| out.write('%s<a href="#%s">%s</a>' % (s, log.revision, log.revision)) |
| unique_logs[log] = True |
| out.write('</td>\n') |
| out.write('</tr>\n') |
| out.write('</table>\n\n') |
| out.write('</div>\n\n') |
| |
| sorted_logs = sorted(unique_logs.keys(), key=LogMessage.sort_key) |
| for log in sorted_logs: |
| out.write('<hr />\n') |
| out.write('<div class="h3" id="%s" title="%s">\n' % (log.revision, |
| log.revision)) |
| out.write('<pre>\n') |
| if revision_url_pattern: |
| revision_url = revision_url_pattern % log.revision[1:] |
| revision = '<a href="%s">%s</a>' \ |
| % (escape_html(revision_url), log.revision) |
| else: |
| revision = log.revision |
| out.write('<b>%s | %s | %s</b>\n\n' % (revision, |
| escape_html(log.committer), |
| escape_html(log.date))) |
| out.write(spam_guard_in_html_block(escape_html(log.message))) |
| out.write('</pre>\n') |
| out.write('</div>\n\n') |
| out.write('<hr />\n') |
| |
| out.write(html_footer()) |
| out.close() |
| |
| |
| class Field: |
| """One field in one log message.""" |
| def __init__(self, name, alias = None): |
| # The name of this field (e.g., "Patch", "Review", etc). |
| self.name = name |
| # An alias for the name of this field (e.g., "Reviewed"). |
| self.alias = alias |
| # A list of contributor objects, in the order in which they were |
| # encountered in the field. |
| self.contributors = [ ] |
| # Any parenthesized asides immediately following the field. The |
| # parentheses and trailing newline are left on. In theory, this |
| # supports concatenation of consecutive asides. In practice, the |
| # parser only detects the first one anyway, because additional |
| # ones are very uncommon and furthermore by that point one should |
| # probably be looking at the full log message. |
| self.addendum = '' |
| def add_contributor(self, contributor): |
| self.contributors.append(contributor) |
| def add_endum(self, addendum): |
| self.addendum += addendum |
| def __str__(self): |
| s = 'FIELD: %s (%d contributors)\n' % (self.name, len(self.contributors)) |
| for contributor in self.contributors: |
| s += str(contributor) + '\n' |
| s += self.addendum |
| return s |
| |
| |
| class LogMessage(object): |
| # Maps revision strings (e.g., "r12345") onto LogMessage instances, |
| # holding all the LogMessage instances ever created. |
| all_logs = { } |
| # Keep track of youngest rev. |
| max_revnum = 0 |
| def __init__(self, revision, committer, date): |
| """Instantiate a log message. All arguments are strings, |
| including REVISION, which should retain its leading 'r'.""" |
| self.revision = revision |
| self.committer = committer |
| self.date = date |
| self.message = '' |
| # Map field names (e.g., "Patch", "Review", "Suggested") onto |
| # Field objects. |
| self.fields = { } |
| if revision in LogMessage.all_logs: |
| complain("Revision '%s' seen more than once" % revision, True) |
| LogMessage.all_logs[revision] = self |
| rev_as_number = int(revision[1:]) |
| if rev_as_number > LogMessage.max_revnum: |
| LogMessage.max_revnum = rev_as_number |
| def add_field(self, field): |
| self.fields[field.name] = field |
| def accum(self, line): |
| """Accumulate one more line of raw message.""" |
| self.message += line |
| |
| def __cmp__(self, other): |
| """Compare two log messages by revision number, for sort(). |
| Return -1, 0 or 1 depending on whether a > b, a == b, or a < b. |
| Note that this is reversed from normal sorting behavior, but it's |
| what we want for reverse chronological ordering of revisions.""" |
| a = int(self.revision[1:]) |
| b = int(other.revision[1:]) |
| if a > b: return -1 |
| if a < b: return 1 |
| else: return 0 |
| |
| def sort_key(self): |
| return int(self.revision[1:]) |
| |
| def __str__(self): |
| s = '=' * 15 |
| header = ' LOG: %s | %s ' % (self.revision, self.committer) |
| s += header |
| s += '=' * 15 |
| s += '\n' |
| for field_name in self.fields.keys(): |
| s += str(self.fields[field_name]) + '\n' |
| s += '-' * 15 |
| s += '-' * len(header) |
| s += '-' * 15 |
| s += '\n' |
| return s |
| |
| |
| |
| ### Code to parse the logs. ## |
| |
| log_separator = '-' * 72 + '\n' |
| log_header_re = re.compile\ |
| ('^(r[0-9]+) \| ([^|]+) \| ([^|]+) \| ([0-9]+)[^0-9]') |
| field_re = re.compile( |
| '^(Patch|Review(ed)?|Suggested|Found|Inspired|Tested|Reported) by:' |
| '\s*\S.*$') |
| field_aliases = { |
| 'Reviewed' : 'Review', |
| 'Reported' : 'Found', |
| } |
| parenthetical_aside_re = re.compile('^\s*\(.*\)\s*$') |
| |
| def graze(input): |
| just_saw_separator = False |
| |
| while True: |
| line = input.readline() |
| if line == '': break |
| if line == log_separator: |
| if just_saw_separator: |
| sys.stderr.write('Two separators in a row.\n') |
| sys.exit(1) |
| else: |
| just_saw_separator = True |
| num_lines = None |
| continue |
| else: |
| if just_saw_separator: |
| m = log_header_re.match(line) |
| if not m: |
| sys.stderr.write('Could not match log message header.\n') |
| sys.stderr.write('Line was:\n') |
| sys.stderr.write("'%s'\n" % line) |
| sys.exit(1) |
| else: |
| log = LogMessage(m.group(1), m.group(2), m.group(3)) |
| num_lines = int(m.group(4)) |
| just_saw_separator = False |
| saw_patch = False |
| line = input.readline() |
| # Handle 'svn log -v' by waiting for the blank line. |
| while line != '\n': |
| line = input.readline() |
| # Parse the log message. |
| field = None |
| while num_lines > 0: |
| line = input.readline() |
| log.accum(line) |
| m = field_re.match(line) |
| if m: |
| # We're on the first line of a field. Parse the field. |
| while m: |
| if not field: |
| ident = m.group(1) |
| if ident in field_aliases: |
| field = Field(field_aliases[ident], ident) |
| else: |
| field = Field(ident) |
| # Each line begins either with "WORD by:", or with whitespace. |
| in_field_re = re.compile('^(' |
| + (field.alias or field.name) |
| + ' by:\s+|\s+)([^\s(].*)') |
| m = in_field_re.match(line) |
| if m is None: |
| sys.stderr.write("Error matching: %s\n" % (line)) |
| user, real, email = Contributor.parse(m.group(2)) |
| if user == 'me': |
| user = log.committer |
| c = Contributor.get(user, real, email) |
| c.add_activity(field.name, log) |
| if (field.name == 'Patch'): |
| saw_patch = True |
| field.add_contributor(c) |
| line = input.readline() |
| if line == log_separator: |
| # If the log message doesn't end with its own |
| # newline (that is, there's the newline added by the |
| # svn client, but no further newline), then just move |
| # on to the next log entry. |
| just_saw_separator = True |
| num_lines = 0 |
| break |
| log.accum(line) |
| num_lines -= 1 |
| m = in_field_re.match(line) |
| if not m: |
| m = field_re.match(line) |
| if not m: |
| aside_match = parenthetical_aside_re.match(line) |
| if aside_match: |
| field.add_endum(line) |
| log.add_field(field) |
| field = None |
| num_lines -= 1 |
| if not saw_patch and log.committer != '(no author)': |
| c = Contributor.get(log.committer, None, None) |
| c.add_activity('Patch', log) |
| continue |
| |
| index_introduction = ''' |
| <p>The following list of contributors and their contributions is meant |
| to help us keep track of whom to consider for commit access. The list |
| was generated from "svn log" output by <a |
| href="http://svn.apache.org/repos/asf/subversion/trunk/tools/dev/contribulyze.py" |
| >contribulyze.py</a>, which looks for log messages that use the <a |
| href="http://subversion.apache.org/docs/community-guide/conventions.html#crediting" |
| >special contribution format</a>.</p> |
| |
| <p><i>Please do not use this list as a generic guide to who has |
| contributed what to Subversion!</i> It omits existing <a |
| href="http://svn.apache.org/repos/asf/subversion/trunk/COMMITTERS" |
| >full committers</a>, for example, because they are irrelevant to our |
| search for new committers. Also, it merely counts changes, it does |
| not evaluate them. To truly understand what someone has contributed, |
| you have to read their changes in detail. This page can only assist |
| human judgement, not substitute for it.</p> |
| |
| ''' |
| |
| def drop(revision_url_pattern): |
| # Output the data. |
| # |
| # The data structures are all linked up nicely to one another. You |
| # can get all the LogMessages, and each LogMessage contains all the |
| # Contributors involved with that commit; likewise, each Contributor |
| # points back to all the LogMessages it contributed to. |
| # |
| # However, the HTML output is pretty simple right now. It's not take |
| # full advantage of all that cross-linking. For each contributor, we |
| # just create a file listing all the revisions contributed to; and we |
| # build a master index of all contributors, each name being a link to |
| # that contributor's individual file. Much more is possible... but |
| # let's just get this up and running first. |
| |
| for key in LogMessage.all_logs.keys(): |
| # You could print out all log messages this way, if you wanted to. |
| pass |
| # print LogMessage.all_logs[key] |
| |
| detail_subdir = "detail" |
| if not os.path.exists(detail_subdir): |
| os.mkdir(detail_subdir) |
| |
| index = open('index.html', 'w') |
| index.write(html_header('Contributors as of r%d' % LogMessage.max_revnum)) |
| index.write(index_introduction) |
| index.write('<ol>\n') |
| # The same contributor appears under multiple keys, so uniquify. |
| seen_contributors = { } |
| # Sorting alphabetically is acceptable, but even better would be to |
| # sort by number of contributions, so the most active people appear at |
| # the top -- that way we know whom to look at first for commit access |
| # proposals. |
| sorted_contributors = sorted(Contributor.all_contributors.values(), |
| key = Contributor.sort_key) |
| for c in sorted_contributors: |
| if c not in seen_contributors: |
| if c.score() > 0: |
| if c.is_full_committer: |
| # Don't even bother to print out full committers. They are |
| # a distraction from the purposes for which we're here. |
| continue |
| else: |
| committerness = '' |
| if c.is_committer: |
| committerness = ' (partial committer)' |
| urlpath = "%s/%s.html" % (detail_subdir, c.canonical_name()) |
| fname = os.path.join(detail_subdir, "%s.html" % c.canonical_name()) |
| index.write('<li><p><a href="%s">%s</a> [%s]%s</p></li>\n' |
| % (urllib_parse_quote(urlpath), |
| c.big_name(html=True), |
| c.score_str(), committerness)) |
| c.html_out(revision_url_pattern, fname) |
| seen_contributors[c] = True |
| index.write('</ol>\n') |
| index.write(html_footer()) |
| index.close() |
| |
| |
| def process_committers(committers): |
| """Read from open file handle COMMITTERS, which should be in |
| the same format as the Subversion 'COMMITTERS' file. Create |
| Contributor objects based on the contents.""" |
| line = committers.readline() |
| while line != 'Blanket commit access:\n': |
| line = committers.readline() |
| in_full_committers = True |
| matcher = re.compile('(\S+)\s+([^\(\)]+)\s+(\([^()]+\)){0,1}') |
| line = committers.readline() |
| while line: |
| # Every @-sign we see after this point indicates a committer line... |
| if line == 'Commit access for specific areas:\n': |
| in_full_committers = False |
| # ...except in the "dormant committers" area, which comes last anyway. |
| if line == 'Committers who have asked to be listed as dormant:\n': |
| in_full_committers = True |
| elif line.find('@') >= 0: |
| line = line.lstrip() |
| m = matcher.match(line) |
| user = m.group(1) |
| real_and_email = m.group(2).strip() |
| ignored, real, email = Contributor.parse(real_and_email) |
| c = Contributor.get(user, real, email) |
| c.is_committer = True |
| c.is_full_committer = in_full_committers |
| line = committers.readline() |
| |
| |
| def usage(): |
| print('USAGE: %s [-C COMMITTERS_FILE] < SVN_LOG_OR_LOG-V_OUTPUT' \ |
| % os.path.basename(sys.argv[0])) |
| print('') |
| print('Create HTML files in the current directory, rooted at index.html,') |
| print('in which you can browse to see who contributed what.') |
| print('') |
| print('The log input should use the contribution-tracking format defined') |
| print('in http://subversion.apache.org/docs/community-guide/conventions.html#crediting.') |
| print('') |
| print('Options:') |
| print('') |
| print(' -h, -H, -?, --help Print this usage message and exit') |
| print(' -C FILE Use FILE as the COMMITTERS file') |
| print(' -U URL Use URL as a Python interpolation pattern to') |
| print(' generate URLs to link revisions to some kind') |
| print(' of web-based viewer (e.g. ViewCVS). The') |
| print(' interpolation pattern should contain exactly') |
| print(' one format specifier, \'%s\', which will be') |
| print(' replaced with the revision number.') |
| print('') |
| |
| |
| def main(): |
| try: |
| opts, args = my_getopt(sys.argv[1:], 'C:U:hH?', [ 'help' ]) |
| except getopt.GetoptError as e: |
| complain(str(e) + '\n\n') |
| usage() |
| sys.exit(1) |
| |
| # Parse options. |
| revision_url_pattern = None |
| for opt, value in opts: |
| if opt in ('--help', '-h', '-H', '-?'): |
| usage() |
| sys.exit(0) |
| elif opt == '-C': |
| process_committers(open(value)) |
| elif opt == '-U': |
| revision_url_pattern = value |
| |
| # Gather the data. |
| graze(sys.stdin) |
| |
| # Output the data. |
| drop(revision_url_pattern) |
| |
| if __name__ == '__main__': |
| main() |