tools/dev/normalize-dump.py - subversion - Git at Google

 #!/usr/bin/env python
 #
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #

 import sys
 import re

 header_re = re.compile(r'^([^:]*): ?(.*)$')

 class NodePath:
     def __init__(self, path, headers):
         self.path = path
         self.headers = headers

     def dump(self):
         print((' ' * 3) + self.path)
         headers = sorted(self.headers.keys())
         for header in headers:
             print((' ' * 6) + header + ': ' + self.headers[header])


 def dump_revision(rev, nodepaths):
     sys.stderr.write('* Normalizing revision ' + rev + '...')
     print('Revision ' + rev)
     paths = sorted(nodepaths.keys())
     for path in paths:
         nodepath = nodepaths[path]
         nodepath.dump()
     sys.stderr.write('done\n')


 def parse_header_block(fp):
     headers = {}
     while True:
         line = fp.readline()
         if line == '':
             return headers, 1
         line = line.strip()
         if line == '':
             return headers, 0
         matches = header_re.match(line)
         if not matches:
             raise Exception('Malformed header block')
         headers[matches.group(1)] = matches.group(2)


 def parse_file(fp):
     nodepaths = {}
     current_rev = None

     while True:
         # Parse a block of headers
         headers, eof = parse_header_block(fp)

         # This is a revision header block
         if 'Revision-number' in headers:

             # If there was a previous revision, dump it
             if current_rev:
                 dump_revision(current_rev, nodepaths)

             # Reset the data for this revision
             current_rev = headers['Revision-number']
             nodepaths = {}

             # Skip the contents
             prop_len = headers.get('Prop-content-length', 0)
             fp.read(int(prop_len))

         # This is a node header block
         elif 'Node-path' in headers:

             # Make a new NodePath object, and add it to the
             # dictionary thereof
             path = headers['Node-path']
             node = NodePath(path, headers)
             nodepaths[path] = node

             # Skip the content
             text_len = headers.get('Text-content-length', 0)
             prop_len = headers.get('Prop-content-length', 0)
             fp.read(int(text_len) + int(prop_len))

         # Not a revision, not a node -- if we've already seen at least
         # one revision block, we are in an errorful state.
         elif current_rev and len(headers.keys()):
             raise Exception('Header block from outta nowhere')

         if eof:
             if current_rev:
                 dump_revision(current_rev, nodepaths)
             break

 def usage():
     print('Usage: ' + sys.argv[0] + ' [DUMPFILE]')
     print('')
     print('Reads a Subversion dumpfile from DUMPFILE (or, if not provided,')
     print('from stdin) and normalizes the metadata contained therein,')
     print('printing summarized and sorted information.  This is useful for')
     print('generating data about dumpfiles in a diffable fashion.')
     sys.exit(0)

 def main():
     if len(sys.argv) > 1:
         if sys.argv[1] == '--help':
             usage()
         fp = open(sys.argv[1], 'rb')
     else:
         fp = sys.stdin
     parse_file(fp)


 if __name__ == '__main__':
     main()
	#!/usr/bin/env python
	#
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#

	import sys
	import re

	header_re = re.compile(r'^([^:]): ?(.)$')

	class NodePath:
	def __init__(self, path, headers):
	self.path = path
	self.headers = headers

	def dump(self):
	print((' ' * 3) + self.path)
	headers = sorted(self.headers.keys())
	for header in headers:
	print((' ' * 6) + header + ': ' + self.headers[header])


	def dump_revision(rev, nodepaths):
	sys.stderr.write('* Normalizing revision ' + rev + '...')
	print('Revision ' + rev)
	paths = sorted(nodepaths.keys())
	for path in paths:
	nodepath = nodepaths[path]
	nodepath.dump()
	sys.stderr.write('done\n')



	def parse_header_block(fp):
	headers = {}
	while True:
	line = fp.readline()
	if line == '':
	return headers, 1
	line = line.strip()
	if line == '':
	return headers, 0
	matches = header_re.match(line)
	if not matches:
	raise Exception('Malformed header block')
	headers[matches.group(1)] = matches.group(2)


	def parse_file(fp):
	nodepaths = {}
	current_rev = None

	while True:
	# Parse a block of headers
	headers, eof = parse_header_block(fp)

	# This is a revision header block
	if 'Revision-number' in headers:

	# If there was a previous revision, dump it
	if current_rev:
	dump_revision(current_rev, nodepaths)

	# Reset the data for this revision
	current_rev = headers['Revision-number']
	nodepaths = {}

	# Skip the contents
	prop_len = headers.get('Prop-content-length', 0)
	fp.read(int(prop_len))

	# This is a node header block
	elif 'Node-path' in headers:

	# Make a new NodePath object, and add it to the
	# dictionary thereof
	path = headers['Node-path']
	node = NodePath(path, headers)
	nodepaths[path] = node

	# Skip the content
	text_len = headers.get('Text-content-length', 0)
	prop_len = headers.get('Prop-content-length', 0)
	fp.read(int(text_len) + int(prop_len))

	# Not a revision, not a node -- if we've already seen at least
	# one revision block, we are in an errorful state.
	elif current_rev and len(headers.keys()):
	raise Exception('Header block from outta nowhere')

	if eof:
	if current_rev:
	dump_revision(current_rev, nodepaths)
	break

	def usage():
	print('Usage: ' + sys.argv[0] + ' [DUMPFILE]')
	print('')
	print('Reads a Subversion dumpfile from DUMPFILE (or, if not provided,')
	print('from stdin) and normalizes the metadata contained therein,')
	print('printing summarized and sorted information. This is useful for')
	print('generating data about dumpfiles in a diffable fashion.')
	sys.exit(0)

	def main():
	if len(sys.argv) > 1:
	if sys.argv[1] == '--help':
	usage()
	fp = open(sys.argv[1], 'rb')
	else:
	fp = sys.stdin
	parse_file(fp)


	if __name__ == '__main__':
	main()