blob: badbe5332c34f9ae03efcd834403390b1dd4eafe [file] [log] [blame]
#!/usr/bin/env python
usage = """
Print the correct FSFS node-rev id, given one that is correct except for
its byte-offset part.
Usage: $0 REPO-DIR FSFS-ID-WITH-BAD-OFFSET
Example:
Result of running 'svnadmin verify':
svnadmin: Corrupt node-revision '5-12302.1-12953.r12953/29475'
Invocation of this script:
$ $0 svn-repo 5-12302.1-12953.r12953/29475
Output of this script:
5-12302.1-12953.r12953/29255
"""
import os, sys
class FixError(Exception):
"""An exception for any kind of inablility to repair the repository."""
pass
def parse_id(id):
"""Return the (NODEREV, REV, OFFSET) of ID, where ID is of the form
"NODEREV/OFFSET", and NODEREV is of the form "SOMETHING.rREV".
"""
noderev, offset = id.split('/')
_, rev = noderev.split('.r')
return noderev, rev, offset
def rev_file_path(repo_dir, rev):
# TODO: support shards
return os.path.join(repo_dir, 'db', 'revs', rev)
def rev_file_indexes(repo_dir, rev):
"""Return (ids, texts), where IDS is a dictionary of all node-rev ids
defined in revision REV of the repo at REPO_DIR, in the form
{noderev: full_id}, and TEXTS is an array of
(offset, size, expanded-size, csum [,sha1-csum, uniquifier]) tuples
taken from all the "text: REV ..." representation lines
in revision REV.
Here, NODEREV is the node-revision id minus the /offset part, and
FULL_ID is the full node-revision id (including the /offset part).
"""
ids = {}
texts = []
for line in open(rev_file_path(repo_dir, rev)):
if line.startswith('id: '):
id = line.replace('id: ', '').rstrip()
id_noderev, id_rev, _ = parse_id(id)
assert id_rev == rev
ids[id_noderev] = id
if line.startswith('text: ' + rev + ' '): # also 'props:' lines?
fields = line.split()
texts.append(tuple(fields[2:]))
return ids, texts
def find_good_id(repo_dir, bad_id):
"""Return the node-rev id that is like BAD_ID but has the byte-offset
part corrected, by looking in the revision file in the repository
at REPO_DIR.
### TODO: Parsing of the rev file should skip over node-content data
when searching for a line matching "id: <id>", to avoid the
possibility of a false match.
"""
noderev, rev, bad_offset = parse_id(bad_id)
ids, _ = rev_file_indexes(repo_dir, rev)
if noderev not in ids:
raise FixError("NodeRev Id '" + noderev + "' not found in r" + rev)
return ids[noderev]
def find_good_rep_header(repo_dir, rev, size):
"""Find a rep header that matches REV and SIZE.
Return the correct offset."""
_, texts = rev_file_indexes(repo_dir, rev)
n_matches = 0
for fields in texts:
if fields[1] == size:
offset = fields[0]
n_matches += 1
if n_matches != 1:
raise FixError("%d matches for r%s, size %s" % (n_matches, rev, size))
return offset
if __name__ == '__main__':
if len(sys.argv) == 4:
repo_dir = sys.argv[1]
rev = sys.argv[2]
size = sys.argv[3]
print "Good offset:", find_good_rep_header(repo_dir, rev, size)
sys.exit(0)
if len(sys.argv) != 3:
print >>sys.stderr, usage
sys.exit(1)
repo_dir = sys.argv[1]
bad_id = sys.argv[2]
good_id = find_good_id(repo_dir, bad_id)
# Replacement ID must be the same length, otherwise I don't know how to
# reconstruct the file so as to preserve all offsets.
# ### TODO: This check should be in the caller rather than here.
if len(good_id) != len(bad_id):
print >>sys.stderr, "warning: the good ID has a different length: " + \
"bad id '" + bad_id + "', good id '" + good_id + "'"
print good_id