| #!/usr/bin/env python |
| |
| # mlpatch.py: Run with no arguments for usage |
| |
| import sys, os |
| import sgmllib |
| from htmlentitydefs import entitydefs |
| import fileinput |
| from urllib2 import urlopen |
| |
| CHUNKSIZE = 8 * 1024 |
| |
| class MyParser(sgmllib.SGMLParser): |
| def __init__(self): |
| self.baseclass = sgmllib.SGMLParser |
| self.baseclass.__init__(self) |
| self.entitydefs = entitydefs |
| self.entitydefs["nbsp"] = " " |
| self.inbody = False |
| self.complete_line = False |
| self.discard_gathered() |
| |
| def discard_gathered(self): |
| self.gather_data = False |
| self.gathered_data = "" |
| |
| def noop(self): |
| pass |
| |
| def out(self, data): |
| sys.stdout.write(data) |
| |
| def handle_starttag(self, tag, method, attrs): |
| if not self.inbody: return |
| self.baseclass.handle_starttag(self, tag, method, attrs) |
| |
| def handle_endtag(self, tag, method): |
| if not self.inbody: return |
| self.baseclass.handle_endtag(self, tag, method) |
| |
| def handle_data(self, data): |
| if not self.inbody: return |
| data = data.replace('\n','') |
| if len(data) == 0: return |
| if self.gather_data: |
| self.gathered_data += data |
| else: |
| if self.complete_line: |
| if data[0] in ('+', '-', ' ', '#') \ |
| or data.startswith("Index:") \ |
| or data.startswith("@@ ") \ |
| or data.startswith("======"): |
| # Real new line |
| self.out('\n') |
| else: |
| # Presume that we are wrapped |
| self.out(' ') |
| self.complete_line = False |
| self.out(data) |
| |
| def handle_charref(self, ref): |
| if not self.inbody: return |
| self.baseclass.handle_charref(self, ref) |
| |
| def handle_entityref(self, ref): |
| if not self.inbody: return |
| self.baseclass.handle_entityref(self, ref) |
| |
| def handle_comment(self, comment): |
| if comment == ' body="start" ': |
| self.inbody = True |
| elif comment == ' body="end" ': |
| self.inbody = False |
| |
| def handle_decl(self, data): |
| if not self.inbody: return |
| print "DECL: " + data |
| |
| def unknown_starttag(self, tag, attrs): |
| if not self.inbody: return |
| print "UNKTAG: %s %s" % (tag, attrs) |
| |
| def unknown_endtag(self, tag): |
| if not self.inbody: return |
| print "UNKTAG: /%s" % (tag) |
| |
| def do_br(self, attrs): |
| self.complete_line = True |
| |
| def do_p(self, attrs): |
| if self.complete_line: |
| self.out('\n') |
| self.out(' ') |
| self.complete_line = True |
| |
| def start_a(self, attrs): |
| self.gather_data = True |
| |
| def end_a(self): |
| self.out(self.gathered_data.replace('_at_', '@')) |
| self.discard_gathered() |
| |
| def close(self): |
| if self.complete_line: |
| self.out('\n') |
| self.baseclass.close(self) |
| |
| |
| def main(): |
| if len(sys.argv) == 1: |
| sys.stderr.write( |
| "usage: mlpatch.py dev|users year month msgno > foobar.patch\n" + |
| "example: mlpatch.py dev 2005 01 0001 > issue-XXXX.patch\n" + |
| """ |
| Very annoyingly, the http://svn.haxx.se/ subversion mailing list archives |
| mangle inline patches, and provide no raw message download facility |
| (other than for an entire month's email as an mbox). |
| |
| So, I wrote this script, to demangle them. It's not perfect, as it has to |
| guess about whitespace, but it does an acceptable job.\n""") |
| sys.exit(0) |
| elif len(sys.argv) != 5: |
| sys.stderr.write("error: mlpatch.py: Bad parameters - run with no " |
| + "parameters for usage\n") |
| sys.exit(1) |
| else: |
| list, year, month, msgno = sys.argv[1:] |
| url = "http://svn.haxx.se/" \ |
| + "%(list)s/archive-%(year)s-%(month)s/%(msgno)s.shtml" % locals() |
| print "MsgUrl: " + url |
| msgfile = urlopen(url) |
| p = MyParser() |
| buffer = msgfile.read(CHUNKSIZE) |
| while buffer: |
| p.feed(buffer) |
| buffer = msgfile.read(CHUNKSIZE) |
| p.close() |
| msgfile.close() |
| |
| if __name__ == '__main__': |
| main() |