tools/bdb/svn-bdb-view.py - subversion - Git at Google

 #!/usr/bin/env python
 #
 # This is a pretty-printer for subversion BDB repository databases.
 #

 import sys, os, re, codecs, textwrap
 import skel, svnfs

 # Parse arguments
 if len(sys.argv) == 2:
   dbhome = os.path.join(sys.argv[1], 'db')
   if not os.path.exists(dbhome):
     sys.stderr.write("%s: '%s' is not a valid svn repository\n" %
         (sys.argv[0], dbhome))
     sys.exit(1)
 else:
   sys.stderr.write("Usage: %s <svn-repository>\n" % sys.argv[0])
   sys.exit(1)

 # Helper Classes
 class RepositoryProblem(Exception):
   pass

 # Helper Functions
 def ok(bool, comment):
   if not bool:
     raise RepositoryProblem(text)

 # Helper Data
 opmap = {
   'add': 'A',
   'modify': 'M',
   'delete': 'D',
   'replace': 'R',
   'reset': 'X',
 }

 # Analysis Modules
 def am_uuid(ctx):
   "uuids"
   db = ctx.uuids_db
   ok(db.keys() == [1], 'uuid Table Structure')
   ok(re.match(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$',
     db[1]), 'UUID format')
   print "Repos UUID: %s" % db[1]

 def am_revisions(ctx):
   "revisions"
   cur = ctx.revs_db.cursor()
   try:
     rec = cur.first()
     ctx.txn2rev = txn2rev = {}
     prevrevnum = -1
     while rec:
       rev = skel.Rev(rec[1])
       revnum = rec[0] - 1
       print "r%d: txn %s%s" % (revnum, rev.txn,
           (not ctx.txns_db.has_key(rev.txn)) and "*** MISSING TXN ***" or "")
       ok(not txn2rev.has_key(rev.txn), 'Multiple revs bound to same txn')
       txn2rev[rev.txn] = revnum
       rec = cur.next()
   finally:
     cur.close()

 def am_changes(ctx):
   "changes"
   cur = ctx.changes_db.cursor()
   try:
     current_txnid_len = 0
     maximum_txnid_len = 0
     while current_txnid_len <= maximum_txnid_len:
       current_txnid_len += 1
       rec = cur.first()
       prevtxn = None
       while rec:
         if len(rec[0]) != current_txnid_len:
           rec = cur.next()
           continue
         ch = skel.Change(rec[1])
         lead = "txn %s:" % rec[0]
         if prevtxn == rec[0]:
           lead = " " * len(lead)
         print "%s %s %s %s %s %s%s" % (lead, opmap[ch.kind], ch.path, ch.node,
             ch.textmod and "T" or "-", ch.propmod and "P" or "-",
             (not ctx.nodes_db.has_key(ch.node)) \
                 and "*** MISSING NODE ***" or "")
         prevtxn = rec[0]
         if len(rec[0]) > maximum_txnid_len:
           maximum_txnid_len = len(rec[0])
         rec = cur.next()
   finally:
     cur.close()

 def am_copies(ctx):
   "copies"
   cur = ctx.copies_db.cursor()
   try:
     print "next-key: %s" % ctx.copies_db['next-key']
     rec = cur.first()
     while rec:
       if rec[0] != 'next-key':
         cp = skel.Copy(rec[1])
         destnode = ctx.nodes_db.get(cp.destnode)
         if not destnode:
           destpath = "*** MISSING NODE ***"
         else:
           destpath = skel.Node(destnode).createpath
         print "cpy %s: %s %s @txn %s to %s (%s)" % (rec[0],
             {'copy':'C','soft-copy':'S'}[cp.kind], cp.srcpath or "-",
             cp.srctxn or "-", cp.destnode, destpath)
       rec = cur.next()
   finally:
     cur.close()

 def am_txns(ctx):
   "transactions"
   cur = ctx.txns_db.cursor()
   try:
     print "next-key: %s" % ctx.txns_db['next-key']
     length = 1
     found_some = True
     while found_some:
       found_some = False
       rec = cur.first()
       while rec:
         if rec[0] != 'next-key' and len(rec[0]) == length:
           found_some = True
           txn = skel.Txn(rec[1])
           if txn.kind == "committed":
             label = "r%s" % txn.rev
             ok(ctx.txn2rev[rec[0]] == int(txn.rev), 'Txn->rev not <-txn')
           else:
             label = "%s based-on %s" % (txn.kind, txn.basenode)
           print "txn %s: %s root-node %s props %d copies %s" % (rec[0],
               label, txn.rootnode, len(txn.proplist) / 2, ",".join(txn.copies))
         rec = cur.next()
       length += 1
   finally:
     cur.close()

 def am_nodes(ctx):
   "nodes"
   cur = ctx.nodes_db.cursor()
   try:
     print "next-key: %s" % ctx.txns_db['next-key']
     rec = cur.first()
     data = {}
     while rec:
       if rec[0] == 'next-key':
         rec = cur.next()
         continue
       nd = skel.Node(rec[1])
       nid,cid,tid = rec[0].split(".")
       data[tid.rjust(20)+nd.createpath] = (rec[0], nd)
       rec = cur.next()
     k = data.keys()
     k.sort()
     reptype = {"fulltext":"F", "delta":"D"}
     for i in k:
       nd = data[i][1]
       prkind = drkind = " "
       if nd.proprep:
         try:
           rep = skel.Rep(ctx.reps_db[nd.proprep])
           prkind = reptype[rep.kind]
           if ctx.bad_reps.has_key(nd.proprep):
             prkind += " *** BAD ***"
         except KeyError:
           prkind = "*** MISSING ***"
       if nd.datarep:
         try:
           rep = skel.Rep(ctx.reps_db[nd.datarep])
           drkind = reptype[rep.kind]
           if ctx.bad_reps.has_key(nd.datarep):
             drkind += " *** BAD ***"
         except KeyError:
           drkind = "*** MISSING ***"
       stringdata = "%s: %s %s pred %s count %s prop %s %s data %s %s edit %s" \
           % ( data[i][0], {"file":"F", "dir":"D"}[nd.kind], nd.createpath,
           nd.prednode or "-", nd.predcount, prkind, nd.proprep or "-",
           drkind, nd.datarep or "-", nd.editrep or "-")
       if nd.createpath == "/":
         print
       print stringdata
   finally:
     cur.close()

 def get_string(ctx, id):
   try:
     return ctx.get_whole_string(id)
   except DbNotFoundError:
     return "*** MISSING STRING ***"

 def am_reps(ctx):
   "representations"
   ctx.bad_reps = {}
   cur = ctx.reps_db.cursor()
   try:
     print "next-key: %s" % ctx.txns_db['next-key']
     rec = cur.first()
     while rec:
       if rec[0] != 'next-key':
         rep = skel.Rep(rec[1])
         lead = "rep %s: txn %s: %s %s " % (rec[0], rep.txn, rep.cksumtype,
             codecs.getencoder('hex_codec')(rep.cksum)[0])
         if rep.kind == "fulltext":
           note = ""
           if not ctx.strings_db.has_key(rep.str):
             note = " *MISS*"
             ctx.bad_reps[rec[0]] = None
           print lead+("fulltext str %s%s" % (rep.str, note))
           if ctx.verbose:
             print textwrap.fill(get_string(ctx, rep.str), initial_indent="  ",
                 subsequent_indent="  ", width=78)
         elif rep.kind == "delta":
           print lead+("delta of %s window%s" % (len(rep.windows),
             len(rep.windows) != 1 and "s" or ""))
           for window in rep.windows:
             noterep = notestr = ""
             if not ctx.reps_db.has_key(window.vs_rep):
               noterep = " *MISS*"
               ctx.bad_reps[rec[0]] = None
             if not ctx.strings_db.has_key(window.str):
               notestr = " *MISS*"
               ctx.bad_reps[rec[0]] = None
             print "\toff %s len %s vs-rep %s%s str %s%s" % (window.offset,
                 window.size, window.vs_rep, noterep, window.str, notestr)
         else:
           print lead+"*** UNKNOWN REPRESENTATION TYPE ***"
       rec = cur.next()
   finally:
     cur.close()


 def am_stringsize(ctx):
   "string size"
   if not ctx.verbose:
     return
   cur = ctx.strings_db.cursor()
   try:
     rec = cur.first()
     size = 0
     while rec:
       size = size + len(rec[1] or "")
       rec = cur.next()
     print size, size/1024.0, size/1024.0/1024.0
   finally:
     cur.close()

 modules = (
     am_uuid,
     am_revisions,
     am_changes,
     am_copies,
     am_txns,
     am_reps,
     am_nodes,
     # Takes too long: am_stringsize,
     )

 def main():
   print "Repository View for '%s'" % dbhome
   print
   ctx = svnfs.Ctx(dbhome, readonly=1)
   # Stash process state in a library data structure. Yuck!
   ctx.verbose = 0
   try:
     for am in modules:
       print "MODULE: %s" % am.__doc__
       am(ctx)
       print
   finally:
     ctx.close()

 if __name__ == '__main__':
   main()
	#!/usr/bin/env python
	#
	# This is a pretty-printer for subversion BDB repository databases.
	#

	import sys, os, re, codecs, textwrap
	import skel, svnfs

	# Parse arguments
	if len(sys.argv) == 2:
	dbhome = os.path.join(sys.argv[1], 'db')
	if not os.path.exists(dbhome):
	sys.stderr.write("%s: '%s' is not a valid svn repository\n" %
	(sys.argv[0], dbhome))
	sys.exit(1)
	else:
	sys.stderr.write("Usage: %s <svn-repository>\n" % sys.argv[0])
	sys.exit(1)

	# Helper Classes
	class RepositoryProblem(Exception):
	pass

	# Helper Functions
	def ok(bool, comment):
	if not bool:
	raise RepositoryProblem(text)

	# Helper Data
	opmap = {
	'add': 'A',
	'modify': 'M',
	'delete': 'D',
	'replace': 'R',
	'reset': 'X',
	}

	# Analysis Modules
	def am_uuid(ctx):
	"uuids"
	db = ctx.uuids_db
	ok(db.keys() == [1], 'uuid Table Structure')
	ok(re.match(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$',
	db[1]), 'UUID format')
	print "Repos UUID: %s" % db[1]

	def am_revisions(ctx):
	"revisions"
	cur = ctx.revs_db.cursor()
	try:
	rec = cur.first()
	ctx.txn2rev = txn2rev = {}
	prevrevnum = -1
	while rec:
	rev = skel.Rev(rec[1])
	revnum = rec[0] - 1
	print "r%d: txn %s%s" % (revnum, rev.txn,
	(not ctx.txns_db.has_key(rev.txn)) and "* MISSING TXN *" or "")
	ok(not txn2rev.has_key(rev.txn), 'Multiple revs bound to same txn')
	txn2rev[rev.txn] = revnum
	rec = cur.next()
	finally:
	cur.close()

	def am_changes(ctx):
	"changes"
	cur = ctx.changes_db.cursor()
	try:
	current_txnid_len = 0
	maximum_txnid_len = 0
	while current_txnid_len <= maximum_txnid_len:
	current_txnid_len += 1
	rec = cur.first()
	prevtxn = None
	while rec:
	if len(rec[0]) != current_txnid_len:
	rec = cur.next()
	continue
	ch = skel.Change(rec[1])
	lead = "txn %s:" % rec[0]
	if prevtxn == rec[0]:
	lead = " " * len(lead)
	print "%s %s %s %s %s %s%s" % (lead, opmap[ch.kind], ch.path, ch.node,
	ch.textmod and "T" or "-", ch.propmod and "P" or "-",
	(not ctx.nodes_db.has_key(ch.node)) \
	and "* MISSING NODE *" or "")
	prevtxn = rec[0]
	if len(rec[0]) > maximum_txnid_len:
	maximum_txnid_len = len(rec[0])
	rec = cur.next()
	finally:
	cur.close()

	def am_copies(ctx):
	"copies"
	cur = ctx.copies_db.cursor()
	try:
	print "next-key: %s" % ctx.copies_db['next-key']
	rec = cur.first()
	while rec:
	if rec[0] != 'next-key':
	cp = skel.Copy(rec[1])
	destnode = ctx.nodes_db.get(cp.destnode)
	if not destnode:
	destpath = "* MISSING NODE *"
	else:
	destpath = skel.Node(destnode).createpath
	print "cpy %s: %s %s @txn %s to %s (%s)" % (rec[0],
	{'copy':'C','soft-copy':'S'}[cp.kind], cp.srcpath or "-",
	cp.srctxn or "-", cp.destnode, destpath)
	rec = cur.next()
	finally:
	cur.close()

	def am_txns(ctx):
	"transactions"
	cur = ctx.txns_db.cursor()
	try:
	print "next-key: %s" % ctx.txns_db['next-key']
	length = 1
	found_some = True
	while found_some:
	found_some = False
	rec = cur.first()
	while rec:
	if rec[0] != 'next-key' and len(rec[0]) == length:
	found_some = True
	txn = skel.Txn(rec[1])
	if txn.kind == "committed":
	label = "r%s" % txn.rev
	ok(ctx.txn2rev[rec[0]] == int(txn.rev), 'Txn->rev not <-txn')
	else:
	label = "%s based-on %s" % (txn.kind, txn.basenode)
	print "txn %s: %s root-node %s props %d copies %s" % (rec[0],
	label, txn.rootnode, len(txn.proplist) / 2, ",".join(txn.copies))
	rec = cur.next()
	length += 1
	finally:
	cur.close()

	def am_nodes(ctx):
	"nodes"
	cur = ctx.nodes_db.cursor()
	try:
	print "next-key: %s" % ctx.txns_db['next-key']
	rec = cur.first()
	data = {}
	while rec:
	if rec[0] == 'next-key':
	rec = cur.next()
	continue
	nd = skel.Node(rec[1])
	nid,cid,tid = rec[0].split(".")
	data[tid.rjust(20)+nd.createpath] = (rec[0], nd)
	rec = cur.next()
	k = data.keys()
	k.sort()
	reptype = {"fulltext":"F", "delta":"D"}
	for i in k:
	nd = data[i][1]
	prkind = drkind = " "
	if nd.proprep:
	try:
	rep = skel.Rep(ctx.reps_db[nd.proprep])
	prkind = reptype[rep.kind]
	if ctx.bad_reps.has_key(nd.proprep):
	prkind += " * BAD *"
	except KeyError:
	prkind = "* MISSING *"
	if nd.datarep:
	try:
	rep = skel.Rep(ctx.reps_db[nd.datarep])
	drkind = reptype[rep.kind]
	if ctx.bad_reps.has_key(nd.datarep):
	drkind += " * BAD *"
	except KeyError:
	drkind = "* MISSING *"
	stringdata = "%s: %s %s pred %s count %s prop %s %s data %s %s edit %s" \
	% ( data[i][0], {"file":"F", "dir":"D"}[nd.kind], nd.createpath,
	nd.prednode or "-", nd.predcount, prkind, nd.proprep or "-",
	drkind, nd.datarep or "-", nd.editrep or "-")
	if nd.createpath == "/":
	print
	print stringdata
	finally:
	cur.close()

	def get_string(ctx, id):
	try:
	return ctx.get_whole_string(id)
	except DbNotFoundError:
	return "* MISSING STRING *"

	def am_reps(ctx):
	"representations"
	ctx.bad_reps = {}
	cur = ctx.reps_db.cursor()
	try:
	print "next-key: %s" % ctx.txns_db['next-key']
	rec = cur.first()
	while rec:
	if rec[0] != 'next-key':
	rep = skel.Rep(rec[1])
	lead = "rep %s: txn %s: %s %s " % (rec[0], rep.txn, rep.cksumtype,
	codecs.getencoder('hex_codec')(rep.cksum)[0])
	if rep.kind == "fulltext":
	note = ""
	if not ctx.strings_db.has_key(rep.str):
	note = " MISS"
	ctx.bad_reps[rec[0]] = None
	print lead+("fulltext str %s%s" % (rep.str, note))
	if ctx.verbose:
	print textwrap.fill(get_string(ctx, rep.str), initial_indent=" ",
	subsequent_indent=" ", width=78)
	elif rep.kind == "delta":
	print lead+("delta of %s window%s" % (len(rep.windows),
	len(rep.windows) != 1 and "s" or ""))
	for window in rep.windows:
	noterep = notestr = ""
	if not ctx.reps_db.has_key(window.vs_rep):
	noterep = " MISS"
	ctx.bad_reps[rec[0]] = None
	if not ctx.strings_db.has_key(window.str):
	notestr = " MISS"
	ctx.bad_reps[rec[0]] = None
	print "\toff %s len %s vs-rep %s%s str %s%s" % (window.offset,
	window.size, window.vs_rep, noterep, window.str, notestr)
	else:
	print lead+"* UNKNOWN REPRESENTATION TYPE *"
	rec = cur.next()
	finally:
	cur.close()


	def am_stringsize(ctx):
	"string size"
	if not ctx.verbose:
	return
	cur = ctx.strings_db.cursor()
	try:
	rec = cur.first()
	size = 0
	while rec:
	size = size + len(rec[1] or "")
	rec = cur.next()
	print size, size/1024.0, size/1024.0/1024.0
	finally:
	cur.close()

	modules = (
	am_uuid,
	am_revisions,
	am_changes,
	am_copies,
	am_txns,
	am_reps,
	am_nodes,
	# Takes too long: am_stringsize,
	)

	def main():
	print "Repository View for '%s'" % dbhome
	print
	ctx = svnfs.Ctx(dbhome, readonly=1)
	# Stash process state in a library data structure. Yuck!
	ctx.verbose = 0
	try:
	for am in modules:
	print "MODULE: %s" % am.__doc__
	am(ctx)
	print
	finally:
	ctx.close()

	if __name__ == '__main__':
	main()