contrib/server-side/fsfsfixer/fixer/find_good_id.py - subversion - Git at Google

 #!/usr/bin/env python

 usage = """
 Print the correct FSFS node-rev id, given one that is correct except for
 its byte-offset part.
 Usage: $0 REPO-DIR FSFS-ID-WITH-BAD-OFFSET
 Example:
   Result of running 'svnadmin verify':
     svnadmin: Corrupt node-revision '5-12302.1-12953.r12953/29475'
   Invocation of this script:
     $ $0 svn-repo 5-12302.1-12953.r12953/29475
   Output of this script:
     5-12302.1-12953.r12953/29255
 """

 import os, sys

 class FixError(Exception):
   """An exception for any kind of inablility to repair the repository."""
   pass

 def parse_id(id):
   """Return the (NODEREV, REV, OFFSET) of ID, where ID is of the form
      "NODEREV/OFFSET", and NODEREV is of the form "SOMETHING.rREV".
   """
   noderev, offset = id.split('/')
   _, rev = noderev.split('.r')
   return noderev, rev, offset

 def rev_file_path(repo_dir, rev):
   # TODO: support shards
   return os.path.join(repo_dir, 'db', 'revs', rev)

 def rev_file_indexes(repo_dir, rev):
   """Return (ids, texts), where IDS is a dictionary of all node-rev ids
      defined in revision REV of the repo at REPO_DIR, in the form
      {noderev: full_id}, and TEXTS is an array of
      (offset, size, expanded-size, csum [,sha1-csum, uniquifier]) tuples
      taken from all the "text: REV ..." representation lines
      in revision REV.

      Here, NODEREV is the node-revision id minus the /offset part, and
      FULL_ID is the full node-revision id (including the /offset part).
      """
   ids = {}
   texts = []
   for line in open(rev_file_path(repo_dir, rev)):
     if line.startswith('id: '):
       id = line.replace('id: ', '').rstrip()
       id_noderev, id_rev, _ = parse_id(id)
       assert id_rev == rev
       ids[id_noderev] = id
     if line.startswith('text: ' + rev + ' '):  # also 'props:' lines?
       fields = line.split()
       texts.append(tuple(fields[2:]))
   return ids, texts

 def find_good_id(repo_dir, bad_id):
   """Return the node-rev id that is like BAD_ID but has the byte-offset
      part corrected, by looking in the revision file in the repository
      at REPO_DIR.

      ### TODO: Parsing of the rev file should skip over node-content data
          when searching for a line matching "id: <id>", to avoid the
          possibility of a false match.
   """

   noderev, rev, bad_offset = parse_id(bad_id)
   ids, _ = rev_file_indexes(repo_dir, rev)

   if noderev not in ids:
     raise FixError("NodeRev Id '" + noderev + "' not found in r" + rev)
   return ids[noderev]

 def find_good_rep_header(repo_dir, rev, size):
   """Find a rep header that matches REV and SIZE.
      Return the correct offset."""
   _, texts = rev_file_indexes(repo_dir, rev)
   n_matches = 0
   for fields in texts:
     if fields[1] == size:
       offset = fields[0]
       n_matches += 1
   if n_matches != 1:
     raise FixError("%d matches for r%s, size %s" % (n_matches, rev, size))
   return offset


 if __name__ == '__main__':

   if len(sys.argv) == 4:
     repo_dir = sys.argv[1]
     rev = sys.argv[2]
     size = sys.argv[3]
     print "Good offset:", find_good_rep_header(repo_dir, rev, size)
     sys.exit(0)

   if len(sys.argv) != 3:
     print >>sys.stderr, usage
     sys.exit(1)

   repo_dir = sys.argv[1]
   bad_id = sys.argv[2]

   good_id = find_good_id(repo_dir, bad_id)

   # Replacement ID must be the same length, otherwise I don't know how to
   # reconstruct the file so as to preserve all offsets.
   # ### TODO: This check should be in the caller rather than here.
   if len(good_id) != len(bad_id):
     print >>sys.stderr, "warning: the good ID has a different length: " + \
                         "bad id '" + bad_id + "', good id '" + good_id + "'"

   print good_id
	#!/usr/bin/env python

	usage = """
	Print the correct FSFS node-rev id, given one that is correct except for
	its byte-offset part.
	Usage: $0 REPO-DIR FSFS-ID-WITH-BAD-OFFSET
	Example:
	Result of running 'svnadmin verify':
	svnadmin: Corrupt node-revision '5-12302.1-12953.r12953/29475'
	Invocation of this script:
	$ $0 svn-repo 5-12302.1-12953.r12953/29475
	Output of this script:
	5-12302.1-12953.r12953/29255
	"""

	import os, sys

	class FixError(Exception):
	"""An exception for any kind of inablility to repair the repository."""
	pass

	def parse_id(id):
	"""Return the (NODEREV, REV, OFFSET) of ID, where ID is of the form
	"NODEREV/OFFSET", and NODEREV is of the form "SOMETHING.rREV".
	"""
	noderev, offset = id.split('/')
	_, rev = noderev.split('.r')
	return noderev, rev, offset

	def rev_file_path(repo_dir, rev):
	# TODO: support shards
	return os.path.join(repo_dir, 'db', 'revs', rev)

	def rev_file_indexes(repo_dir, rev):
	"""Return (ids, texts), where IDS is a dictionary of all node-rev ids
	defined in revision REV of the repo at REPO_DIR, in the form
	{noderev: full_id}, and TEXTS is an array of
	(offset, size, expanded-size, csum [,sha1-csum, uniquifier]) tuples
	taken from all the "text: REV ..." representation lines
	in revision REV.

	Here, NODEREV is the node-revision id minus the /offset part, and
	FULL_ID is the full node-revision id (including the /offset part).
	"""
	ids = {}
	texts = []
	for line in open(rev_file_path(repo_dir, rev)):
	if line.startswith('id: '):
	id = line.replace('id: ', '').rstrip()
	id_noderev, id_rev, _ = parse_id(id)
	assert id_rev == rev
	ids[id_noderev] = id
	if line.startswith('text: ' + rev + ' '): # also 'props:' lines?
	fields = line.split()
	texts.append(tuple(fields[2:]))
	return ids, texts

	def find_good_id(repo_dir, bad_id):
	"""Return the node-rev id that is like BAD_ID but has the byte-offset
	part corrected, by looking in the revision file in the repository
	at REPO_DIR.

	### TODO: Parsing of the rev file should skip over node-content data
	when searching for a line matching "id: <id>", to avoid the
	possibility of a false match.
	"""

	noderev, rev, bad_offset = parse_id(bad_id)
	ids, _ = rev_file_indexes(repo_dir, rev)

	if noderev not in ids:
	raise FixError("NodeRev Id '" + noderev + "' not found in r" + rev)
	return ids[noderev]

	def find_good_rep_header(repo_dir, rev, size):
	"""Find a rep header that matches REV and SIZE.
	Return the correct offset."""
	_, texts = rev_file_indexes(repo_dir, rev)
	n_matches = 0
	for fields in texts:
	if fields[1] == size:
	offset = fields[0]
	n_matches += 1
	if n_matches != 1:
	raise FixError("%d matches for r%s, size %s" % (n_matches, rev, size))
	return offset


	if __name__ == '__main__':

	if len(sys.argv) == 4:
	repo_dir = sys.argv[1]
	rev = sys.argv[2]
	size = sys.argv[3]
	print "Good offset:", find_good_rep_header(repo_dir, rev, size)
	sys.exit(0)

	if len(sys.argv) != 3:
	print >>sys.stderr, usage
	sys.exit(1)

	repo_dir = sys.argv[1]
	bad_id = sys.argv[2]

	good_id = find_good_id(repo_dir, bad_id)

	# Replacement ID must be the same length, otherwise I don't know how to
	# reconstruct the file so as to preserve all offsets.
	# ### TODO: This check should be in the caller rather than here.
	if len(good_id) != len(bad_id):
	print >>sys.stderr, "warning: the good ID has a different length: " + \
	"bad id '" + bad_id + "', good id '" + good_id + "'"

	print good_id