tools/dev/mlpatch.py - subversion - Git at Google

 #!/usr/bin/env python
 #
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #

 # mlpatch.py: Run with no arguments for usage

 import sys, os
 import sgmllib
 try:
   # Python >=3.0
   from html.entities import entitydefs
   from urllib.request import urlopen as urllib_request_urlopen
 except ImportError:
   # Python <3.0
   from htmlentitydefs import entitydefs
   from urllib2 import urlopen as urllib_request_urlopen
 import fileinput

 CHUNKSIZE = 8 * 1024

 class MyParser(sgmllib.SGMLParser):
   def __init__(self):
     self.baseclass = sgmllib.SGMLParser
     self.baseclass.__init__(self)
     self.entitydefs = entitydefs
     self.entitydefs["nbsp"] = " "
     self.inbody = False
     self.complete_line = False
     self.discard_gathered()

   def discard_gathered(self):
     self.gather_data = False
     self.gathered_data = ""

   def noop(self):
     pass

   def out(self, data):
     sys.stdout.write(data)

   def handle_starttag(self, tag, method, attrs):
     if not self.inbody: return
     self.baseclass.handle_starttag(self, tag, method, attrs)

   def handle_endtag(self, tag, method):
     if not self.inbody: return
     self.baseclass.handle_endtag(self, tag, method)

   def handle_data(self, data):
     if not self.inbody: return
     data = data.replace('\n','')
     if len(data) == 0: return
     if self.gather_data:
       self.gathered_data += data
     else:
       if self.complete_line:
         if data[0] in ('+', '-', ' ', '#') \
             or data.startswith("Index:") \
             or data.startswith("@@ ") \
             or data.startswith("======"):
           # Real new line
           self.out('\n')
         else:
           # Presume that we are wrapped
           self.out(' ')
       self.complete_line = False
       self.out(data)

   def handle_charref(self, ref):
     if not self.inbody: return
     self.baseclass.handle_charref(self, ref)

   def handle_entityref(self, ref):
     if not self.inbody: return
     self.baseclass.handle_entityref(self, ref)

   def handle_comment(self, comment):
     if comment == ' body="start" ':
       self.inbody = True
     elif comment == ' body="end" ':
       self.inbody = False

   def handle_decl(self, data):
     if not self.inbody: return
     print("DECL: " + data)

   def unknown_starttag(self, tag, attrs):
     if not self.inbody: return
     print("UNKTAG: %s %s" % (tag, attrs))

   def unknown_endtag(self, tag):
     if not self.inbody: return
     print("UNKTAG: /%s" % (tag))

   def do_br(self, attrs):
     self.complete_line = True

   def do_p(self, attrs):
     if self.complete_line:
       self.out('\n')
     self.out(' ')
     self.complete_line = True

   def start_a(self, attrs):
     self.gather_data = True

   def end_a(self):
     self.out(self.gathered_data.replace('_at_', '@'))
     self.discard_gathered()

   def close(self):
     if self.complete_line:
       self.out('\n')
     self.baseclass.close(self)


 def main():
   if len(sys.argv) == 1:
     sys.stderr.write(
     "usage:   mlpatch.py dev|users year month msgno > foobar.patch\n" +
     "example: mlpatch.py dev 2005 01 0001 > issue-XXXX.patch\n" +
     """
     Very annoyingly, the http://svn.haxx.se/ subversion mailing list archives
     mangle inline patches, and provide no raw message download facility
     (other than for an entire month's email as an mbox).

     So, I wrote this script, to demangle them. It's not perfect, as it has to
     guess about whitespace, but it does an acceptable job.\n""")
     sys.exit(0)
   elif len(sys.argv) != 5:
     sys.stderr.write("error: mlpatch.py: Bad parameters - run with no "
     + "parameters for usage\n")
     sys.exit(1)
   else:
     list, year, month, msgno = sys.argv[1:]
     url = "http://svn.haxx.se/" \
         + "%(list)s/archive-%(year)s-%(month)s/%(msgno)s.shtml" % locals()
     print("MsgUrl: " + url)
     msgfile = urllib_request_urlopen(url)
     p = MyParser()
     buffer = msgfile.read(CHUNKSIZE)
     while buffer:
       p.feed(buffer)
       buffer = msgfile.read(CHUNKSIZE)
     p.close()
     msgfile.close()

 if __name__ == '__main__':
   main()
	#!/usr/bin/env python
	#
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#

	# mlpatch.py: Run with no arguments for usage

	import sys, os
	import sgmllib
	try:
	# Python >=3.0
	from html.entities import entitydefs
	from urllib.request import urlopen as urllib_request_urlopen
	except ImportError:
	# Python <3.0
	from htmlentitydefs import entitydefs
	from urllib2 import urlopen as urllib_request_urlopen
	import fileinput

	CHUNKSIZE = 8 * 1024

	class MyParser(sgmllib.SGMLParser):
	def __init__(self):
	self.baseclass = sgmllib.SGMLParser
	self.baseclass.__init__(self)
	self.entitydefs = entitydefs
	self.entitydefs["nbsp"] = " "
	self.inbody = False
	self.complete_line = False
	self.discard_gathered()

	def discard_gathered(self):
	self.gather_data = False
	self.gathered_data = ""

	def noop(self):
	pass

	def out(self, data):
	sys.stdout.write(data)

	def handle_starttag(self, tag, method, attrs):
	if not self.inbody: return
	self.baseclass.handle_starttag(self, tag, method, attrs)

	def handle_endtag(self, tag, method):
	if not self.inbody: return
	self.baseclass.handle_endtag(self, tag, method)

	def handle_data(self, data):
	if not self.inbody: return
	data = data.replace('\n','')
	if len(data) == 0: return
	if self.gather_data:
	self.gathered_data += data
	else:
	if self.complete_line:
	if data[0] in ('+', '-', ' ', '#') \
	or data.startswith("Index:") \
	or data.startswith("@@ ") \
	or data.startswith("======"):
	# Real new line
	self.out('\n')
	else:
	# Presume that we are wrapped
	self.out(' ')
	self.complete_line = False
	self.out(data)

	def handle_charref(self, ref):
	if not self.inbody: return
	self.baseclass.handle_charref(self, ref)

	def handle_entityref(self, ref):
	if not self.inbody: return
	self.baseclass.handle_entityref(self, ref)

	def handle_comment(self, comment):
	if comment == ' body="start" ':
	self.inbody = True
	elif comment == ' body="end" ':
	self.inbody = False

	def handle_decl(self, data):
	if not self.inbody: return
	print("DECL: " + data)

	def unknown_starttag(self, tag, attrs):
	if not self.inbody: return
	print("UNKTAG: %s %s" % (tag, attrs))

	def unknown_endtag(self, tag):
	if not self.inbody: return
	print("UNKTAG: /%s" % (tag))

	def do_br(self, attrs):
	self.complete_line = True

	def do_p(self, attrs):
	if self.complete_line:
	self.out('\n')
	self.out(' ')
	self.complete_line = True

	def start_a(self, attrs):
	self.gather_data = True

	def end_a(self):
	self.out(self.gathered_data.replace('_at_', '@'))
	self.discard_gathered()

	def close(self):
	if self.complete_line:
	self.out('\n')
	self.baseclass.close(self)


	def main():
	if len(sys.argv) == 1:
	sys.stderr.write(
	"usage: mlpatch.py dev\|users year month msgno > foobar.patch\n" +
	"example: mlpatch.py dev 2005 01 0001 > issue-XXXX.patch\n" +
	"""
	Very annoyingly, the http://svn.haxx.se/ subversion mailing list archives
	mangle inline patches, and provide no raw message download facility
	(other than for an entire month's email as an mbox).

	So, I wrote this script, to demangle them. It's not perfect, as it has to
	guess about whitespace, but it does an acceptable job.\n""")
	sys.exit(0)
	elif len(sys.argv) != 5:
	sys.stderr.write("error: mlpatch.py: Bad parameters - run with no "
	+ "parameters for usage\n")
	sys.exit(1)
	else:
	list, year, month, msgno = sys.argv[1:]
	url = "http://svn.haxx.se/" \
	+ "%(list)s/archive-%(year)s-%(month)s/%(msgno)s.shtml" % locals()
	print("MsgUrl: " + url)
	msgfile = urllib_request_urlopen(url)
	p = MyParser()
	buffer = msgfile.read(CHUNKSIZE)
	while buffer:
	p.feed(buffer)
	buffer = msgfile.read(CHUNKSIZE)
	p.close()
	msgfile.close()

	if __name__ == '__main__':
	main()