blob: 032a1ccaec06ae62d0929105b568644d76853520 [file] [log] [blame]
#!/usr/bin/env python
# Copyright (c) 2006, 2007 by John Szakmeister <john at szakmeister dot net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
import os
import optparse
import sys
import re
try:
import hashlib
md5_new = hashlib.md5
except ImportError:
import md5
md5_new = md5.new
# A handy constant for refering to the NULL digest (one that
# matches every digest).
NULL_DIGEST = '00000000000000000000000000000000'
class FsfsVerifyException(Exception):
pass
class PotentiallyFixableException(FsfsVerifyException):
'''Represents a class of problems that we may be able to fix.'''
def __init__(self, message, offset):
FsfsVerifyException.__init__(self, message)
self.offset = offset
class InvalidInstruction(PotentiallyFixableException):
pass
class InvalidCompressedStream(PotentiallyFixableException):
pass
class InvalidRepHeader(PotentiallyFixableException):
pass
class InvalidWindow(PotentiallyFixableException):
pass
class InvalidSvndiffVersion(FsfsVerifyException):
pass
class InvalidSvndiffHeader(FsfsVerifyException):
pass
class DataCorrupt(FsfsVerifyException):
pass
class NoMoreData(FsfsVerifyException):
pass
class CmdlineError(FsfsVerifyException):
pass
LOG_INSTRUCTIONS = 1
LOG_WINDOWS = 2
LOG_SVNDIFF = 4
LOG_MASK = LOG_SVNDIFF
def log(type, indent, format, *args):
if type & LOG_MASK:
indentStr = ' ' * indent
str = format % args
str = '\n'.join([indentStr + x for x in str.split('\n')])
print(str)
class ByteStream(object):
def __init__(self, fileobj):
self._f = fileobj
def readByte(self):
return ord(self._f.read(1))
def tell(self):
return self._f.tell()
def advance(self, numBytes):
self._f.seek(numBytes, 1)
def clone(self):
if hasattr(self._f, 'clone'):
newFileObj = self._f.clone()
else:
# We expect the file object to map to a real file
#
# Tried using dup(), but (at least on the mac), that ends up
# creating 2 handles to the same underlying os file object,
# instead of two independent file objects. So, we resort to
# an open call to create a new file object
newFileObj = open(self._f.name, 'rb')
newFileObj.seek(self._f.tell())
return ByteStream(newFileObj)
# The following let ByteStream behave as a file within the
# context of this script.
def read(self, *args, **kwargs):
return self._f.read(*args, **kwargs)
def seek(self, *args, **kwargs):
return self._f.seek(*args, **kwargs)
class ZlibByteStream(ByteStream):
def __init__(self, fileobj, length):
self._f = fileobj
# Store the number of bytes consumed thus far so we can compute an offset
self._numBytesConsumed = 0
self._startingOffset = self._f.tell()
import zlib, binascii
self._z = zlib.decompressobj(15)
self._buffer = self._z.decompress(self._f.read(length))
self._origBufferLength = len(self._buffer)
def readByte(self):
if not self._buffer:
raise NoMoreData, "Unexpected end of data stream!"
byte = self._buffer[0]
self._buffer = self._buffer[1:]
return ord(byte)
def tell(self):
return self._origBufferLength - len(self._buffer)
def advance(self, numBytes):
while numBytes:
self.readByte()
def clone(self):
if hasattr(self._f, 'clone'):
newFileObj = self._f.clone()
else:
newFileObj = open(self._f.name, 'rb')
newFileObj.seek(self._f.tell())
return ByteStream(newFileObj)
# The following let ByteStream behave as a file within the
# context of this script.
def read(self, *args, **kwargs):
raise
def seek(self, *args, **kwargs):
raise
def getVarint(byteStream):
'''Grabs a variable sized int from a bitstream (meaning this function
doesn't seek).'''
i = long(0)
while True:
byte = byteStream.readByte()
i = (i << 7) + (byte & 0x7F)
if byte & 0x80 == 0:
break
return i
INSTR_COPY_SOURCE = 'copy-source'
INSTR_COPY_TARGET = 'copy-target'
INSTR_COPY_DATA = 'copy-data'
class SvndiffInstruction(object):
def __init__(self, byteStream):
self.instrOffset = byteStream.tell()
byte = byteStream.readByte()
instruction = (byte >> 6) & 3
length = byte & 0x3F
if instruction == 3:
raise InvalidInstruction(
"Invalid instruction found at offset %d (%02X)" % (self.instrOffset,
byte),
self.instrOffset)
if instruction == 0:
self.type = INSTR_COPY_SOURCE
elif instruction == 1:
self.type = INSTR_COPY_TARGET
else:
self.type = INSTR_COPY_DATA
if length == 0:
# Length is coded as a varint following the current byte
length = getVarint(byteStream)
self.length = length
if (self.type == INSTR_COPY_SOURCE) or (self.type == INSTR_COPY_TARGET):
self.offset = getVarint(byteStream)
if self.type == INSTR_COPY_SOURCE:
self.sourceOffset = self.offset
else:
self.sourceOffset = 0
if self.type == INSTR_COPY_TARGET:
self.targetOffset = self.offset
else:
self.targetOffset = 0
# Determine the number of bytes consumed in the source stream, target
# stream, and the data stream
if self.type == INSTR_COPY_SOURCE:
self.sourceLength = self.length
else:
self.sourceLength = 0
if self.type == INSTR_COPY_TARGET:
self.targetLength = self.length
else:
self.targetLength = 0
if self.type == INSTR_COPY_DATA:
self.dataLength = self.length
else:
self.dataLength = 0
self.instrLength = byteStream.tell() - self.instrOffset
def __repr__(self):
return '<SvndiffInstruction %s so:%d sl:%d to: %d tl:%d dl:%d (%d, %d)>' % (
self.type, self.sourceOffset, self.sourceLength, self.targetOffset,
self.targetLength, self.dataLength, self.instrOffset, self.instrLength)
class Window(object):
def __init__(self, byteStream, svndiffVersion):
if svndiffVersion not in [0, 1]:
raise InvalidSvndiffVersion, \
"Invalid svndiff version %d" % svndiffVersion
# Record the initial offset of the window
self.windowOffset = byteStream.tell()
try:
self.sourceOffset = getVarint(byteStream)
self.sourceLength = getVarint(byteStream)
self.targetLength = getVarint(byteStream)
self.instrLength = getVarint(byteStream)
self.dataLength = getVarint(byteStream)
self.windowHeaderLength = byteStream.tell() - self.windowOffset
self.windowLength = \
self.windowHeaderLength + self.instrLength + self.dataLength
# Store the byte stream, and clone it for use as a data stream.
self.instrByteStream = byteStream
self.dataByteStream = byteStream.clone()
# Advance the data stream past the instructions to the start of the data.
self.dataByteStream.advance(self.instrLength)
except:
e = InvalidWindow(
"The window header at offset %d appears to be corrupted" % \
(self.windowOffset),
self.windowOffset)
e.windowOffset = self.windowOffset
raise e
# In svndiff1, the instruction area starts with a varint-encoded length.
# If this length matches the one encoded in the header, then there is no
# compression. If it differs, then the stream is compressed with zlib.
self.origInstrStream = self.instrByteStream
self.origDataStream = self.dataByteStream
self.isInstrCompressed = False
self.isDataCompressed = False
self.compressedInstrLength = self.instrLength
self.compressedDataLength = self.dataLength
if svndiffVersion == 1:
try:
offset = self.instrByteStream.tell()
encodedInstrLength = getVarint(self.instrByteStream)
instrIntSize = self.instrByteStream.tell() - offset
offset = self.dataByteStream.tell()
encodedDataLength = getVarint(self.dataByteStream)
dataIntSize = self.dataByteStream.tell() - offset
self.instrLength = encodedInstrLength
self.dataLength = encodedDataLength
except:
e = InvalidWindow(
"The window header at offset %d appears to be corrupted" % \
(self.windowOffset),
self.windowOffset)
e.windowOffset = self.windowOffset
raise e
# Now, we need to make a determination about whether the data and
# instructions are compressed. If they are, we need to zlib decompress
# them. We do that by creating another stream and that will decompress
# the data on the fly.
try:
offset = self.instrByteStream.tell()
if self.compressedInstrLength - instrIntSize != self.instrLength:
self.origInstrStream = self.instrByteStream
self.instrByteStream = ZlibByteStream(self.origInstrStream,
self.compressedInstrLength)
self.isInstrCompressed = True
except Exception as e:
new_e = InvalidCompressedStream(
"Invalid compressed instr stream at offset %d (%s)" % (offset,
str(e)),
offset)
new_e.windowOffset = self.windowOffset
raise new_e
try:
offset = self.dataByteStream.tell()
if self.compressedDataLength - dataIntSize != self.dataLength:
self.origDataStream = self.dataByteStream
self.dataByteStream = ZlibByteStream(self.origDataStream,
self.compressedDataLength)
self.isDataCompressed = True
except Exception as e:
new_e = InvalidCompressedStream(
"Invalid compressed data stream at offset %d (%s, %s)\n" % (
offset, str(e), repr(self)),
offset)
new_e.windowOffset = self.windowOffset
raise new_e
def verify(self):
expectedInstrLength = self.instrLength
expectedDataLength = self.dataLength
expectedTargetLength = self.targetLength
expectedSourceLength = self.sourceLength
computedInstrLength = 0
computedDataLength = 0
computedTargetLength = 0
computedSourceLength = 0
if expectedInstrLength == 0:
e = InvalidWindow(
"Corrupt window (at offset %d) has 0 instructions?!" % self.windowOffset,
self.windowOffset)
e.windowOffset = self.windowOffset
raise e
while computedInstrLength < expectedInstrLength:
try:
instr = SvndiffInstruction(self.instrByteStream)
except PotentiallyFixableException as e:
e.window = self
e.windowOffset = self.windowOffset
raise
log(LOG_INSTRUCTIONS, 4, repr(instr))
computedInstrLength += instr.instrLength
computedDataLength += instr.dataLength
computedSourceLength += instr.sourceLength
computedTargetLength += \
instr.targetLength + instr.sourceLength + instr.dataLength
if computedInstrLength != expectedInstrLength:
e = InvalidWindow(
"The number of instruction bytes consumed (%d) doesn't match the expected number (%d)" % \
(computedInstrLength, expectedInstrLength),
self.windowOffset)
e.windowOffset = self.windowOffset
raise e
if computedDataLength != expectedDataLength:
e = InvalidWindow(
"The number of data bytes consumed (%d) doesn't match the expected number (%d)" % \
(computedDataLength, expectedDataLength),
self.windowOffset)
e.windowOffset = self.windowOffset
raise e
if computedTargetLength != expectedTargetLength:
e = InvalidWindow(
"The number of target bytes consumed (%d) doesn't match the expected number (%d)" % \
(computedTargetLength, expectedTargetLength),
self.windowOffset)
e.windowOffset = self.windowOffset
raise e
# It appears that the source length specified in the window, isn't exactly
# equal to what gets consumed. I suspect that's because the algorithm is using different
# offsets within the window, and one offset/length pair will reach the end of the window.
# However, this hasn't shown to be a clear indicator of corruption. So for now, I'm
# commenting it out.
#
#if computedSourceLength != expectedSourceLength:
# e = InvalidWindow(
# "The number of source bytes consumed (%d) doesn't match the expected number (%d)" % \
# (computedSourceLength, expectedSourceLength),
# self.windowOffset)
# e.windowOffset = self.windowOffset
# raise e
# Advance past the data. We do this using seek because we might have
# read a few bytes from the stream if it potentially had compressed data
self.origInstrStream.seek(self.windowOffset + self.windowLength)
def __repr__(self):
if hasattr(self, 'compressedInstrLength'):
str = 'cil: %d cdl: %d ' % (self.compressedInstrLength,
self.compressedDataLength)
else:
str = ''
return "<Window wo:%d so:%d sl:%d tl:%d %sil:%d dl:%d whl:%d wl:%d>" % (
self.windowOffset, self.sourceOffset, self.sourceLength,
self.targetLength, str, self.instrLength, self.dataLength,
self.windowHeaderLength, self.windowLength)
class Svndiff(object):
def __init__(self, fileobj, length):
self._f = fileobj
self.startingOffset = self._f.tell()
header = self._f.read(4)
if len(header) != 4:
raise EOFError, \
"Unexpected end of file while svndiff header at offset %d)" % \
(self._f.tell())
if header[0:3] != 'SVN':
raise InvalidSvndiffHeader, "Invalid svndiff header at offset %d" % \
(self.startingOffset)
self.version = ord(header[3])
if self.version not in [0, 1]:
raise InvalidSvndiffVersion, "Invalid svndiff version %d" % self.version
self._length = length - 4
def verify(self):
self._f.seek(self.startingOffset+4)
bs = ByteStream(self._f)
log(LOG_SVNDIFF, 2, "<Svndiff so: %d ver: %d>", self.startingOffset,
self.version)
try:
remaining = self._length
while remaining > 0:
w = Window(bs, self.version)
log(LOG_WINDOWS, 3, repr(w))
w.verify()
remaining -= w.windowLength
except PotentiallyFixableException as e:
e.svndiffStart = self.startingOffset
raise
def getDirHash(f):
l = f.readline()
if l != 'PLAIN\n':
raise ValueError, "Expected a PLAIN representation (%d)" % f.tell()
hash = {}
while True:
field = f.readline()[:-1]
if field == 'END':
break
assert(field[0] == 'K')
length = int(field.split(' ')[1])
field = f.readline()[:length]
value = f.readline()[:-1]
assert(value[0] == 'V')
length = int(value.split(' ')[1])
value = f.readline()[:length]
(type, txn) = value.split(' ')
hash[field] = [NodeType(type), NodeId(txn)]
return hash
class Rep(object):
def __init__(self, type, rev, offset, length, size, digest,
contentType, currentRev, noderev):
self.type = type
self.rev = int(rev)
self.offset = int(offset)
self.length = int(length)
self.size = int(size)
self.digest = digest.strip()
self.currentRev = currentRev
self.contentType = contentType
self.noderev = noderev
def __repr__(self):
if not self.contentType:
contentType = 'UNKNOWN'
else:
if self.contentType not in ['PLAIN', 'DELTA', None]:
contentType = 'INVALID'
else:
contentType = self.contentType
return '%s: %s %d %d %d %d %s' % (self.type, contentType, self.rev,
self.offset, self.length, self.size,
self.digest)
def verify(self, f, dumpInstructions, dumpWindows):
if self.contentType not in ['PLAIN', 'DELTA', None]:
e = InvalidRepHeader("Invalid rep header found at %d (%s)!" % \
(self.offset, self.contentType),
self.offset)
e.rep = self
e.noderev = self.noderev
raise e
if self.rev != currentRev:
sys.stderr.write("Skipping text rep since it isn't present in the current rev\n")
return
f.seek(self.offset)
header = f.read(5)
if header != self.contentType:
raise FsfsVerifyException, \
"Invalid rep header found at %d (%s, %s)!" % (self.offset, header,
self.contentType)
if header == 'DELTA':
line = f.readline()
digest = None
# This should be the start of the svndiff stream
actual_start = f.tell()
try:
svndiff = Svndiff(f, self.length)
svndiff.verify()
except Exception as e:
e.rep = self
e.noderev = self.noderev
raise
if digest and (self.digest != NULL_DIGEST):
assert(digest == self.digest)
else:
if f.read(1) != '\n':
raise DataCorrupt, "Expected a '\\n' after PLAIN"
m = md5_new()
m.update(f.read(self.length))
if self.digest and self.digest != NULL_DIGEST \
and self.digest != m.hexdigest():
raise DataCorrupt, \
"PLAIN data is corrupted. Expected digest '%s', computed '%s'." % (
self.digest, m.hexdigest())
buf = f.read(6)
if buf != 'ENDREP':
raise DataCorrupt, "Terminating ENDREP missing! %r, %r" % (buf, self)
pass
class TextRep(Rep):
def __init__(self, rev, offset, length, size, digest,
contentType, currentRev, noderev, sha1=None, uniquifier=None):
super(TextRep,self).__init__('text', rev, offset, length, size,
digest, contentType, currentRev, noderev)
self.sha1 = None
self.uniquifier = None
class PropRep(Rep):
def __init__(self, rev, offset, length, size, digest,
contentType, currentRev, noderev):
super(PropRep,self).__init__('prop', rev, offset, length, size,
digest, contentType, currentRev, noderev)
class NodeId(object):
def __init__(self, nodeid):
(self.txn_name, offset) = nodeid.split('/')
self.offset = int(offset)
self.rev = int(self.txn_name.split('.')[2][1:])
def __repr__(self):
return self.txn_name + '/%d' % self.offset
def __eq__ (self, other):
s = self.txn_name + '/%d' % self.offset
if s == other:
return True
return False
class NodeType(object):
def __init__(self, t):
if (t != 'file') and (t != 'dir'):
raise ValueError, 'Invalid Node type received: "%s"' % t
self.type = t
def __repr__(self):
return self.type[:]
class NodeRev(object):
def __init__(self, f, currentRev):
self.pred = None
self.text = None
self.props = None
self.cpath = None
self.copyroot = None
self.copyfrom = None
self.dir = []
self.nodeOffset = f.tell()
while True:
currentOffset = f.tell()
line = f.readline()
if line == '':
raise IOError, "Unexpected end of file at offset %d" % currentOffset
if line == '\n':
break
# break apart the line
try:
(field, value) = line.split(':', 1)
except:
print("line: '%s'" % repr(line))
print("Node revision offset: %i" % self.nodeOffset)
print("Current file position: %i" % f.tell())
raise
if field == "":
print("line: '%s'" % repr(line))
print("Node revision offset: %i" % self.nodeOffset)
print("Current file position: %i" % f.tell())
raise Exception("Empty field in node revision")
# pull of the leading space and trailing new line
if len(value) < 2:
raise FsfsVerifyException("value needs to contain 2 or more bytes (%d)" % currentOffset)
value = value[1:-1]
assert value != ""
if field == 'id':
self.id = NodeId(value)
elif field == 'type':
self.type = NodeType(value)
elif field == 'pred':
self.pred = NodeId(value)
elif field == 'text':
values = value.split(' ')
rev = int(values[0])
offset = int(values[1])
length = int(values[2])
size = int(values[3])
digest = values[4]
if len(values) > 5:
sha1 = values[5]
else:
sha1 = None
if len(values) > 6:
uniquifier = values[6]
else:
uniquifier = None
if rev != currentRev:
contentType = None
else:
savedOffset = f.tell()
f.seek(offset)
contentType = f.read(5)
f.seek(savedOffset)
self.text = TextRep(rev, offset, length, size, digest,
contentType, currentRev, self, sha1, uniquifier)
elif field == 'props':
(rev, offset, length, size, digest) = value.split(' ')
rev = int(rev)
offset = int(offset)
length = int(length)
size = int(size)
if rev != currentRev:
contentType = None
else:
savedOffset = f.tell()
f.seek(offset)
contentType = f.read(5)
f.seek(savedOffset)
self.props = PropRep(rev, offset, length, size, digest,
contentType, currentRev, self)
elif field == 'cpath':
self.cpath = value
elif field == 'copyroot':
self.copyroot = value
elif field == 'copyfrom':
self.copyfrom = value
elif field == 'count' or field == 'minfo-cnt' or field == 'minfo-here':
pass
else:
raise Exception("Unrecognized field '%s'\n" % field)
if self.type.type == 'dir':
if self.text:
if self.id.rev == self.text.rev:
offset = f.tell()
f.seek(self.text.offset)
self.dir = getDirHash(f)
for k,v in self.dir.items():
nodeType, nodeId = v
if nodeId.rev != self.id.rev:
if not os.path.exists(str(nodeId.rev)):
print("Can't check %s" % repr(nodeId))
continue
tmp = open(str(nodeId.rev),'rb')
tmp.seek(nodeId.offset)
idLine = tmp.readline()
tmp.close()
else:
f.seek(nodeId.offset)
idLine = f.readline()
if idLine != ("id: %s\n" % nodeId):
raise DataCorrupt(
("Entry for '%s' at " % k ) +
("offset %d is pointing to an " % self.text.offset) +
("invalid location (node claims to be at offset %d)" % (
nodeId.offset))
)
f.seek(offset)
else:
# The directory entries are stored in another file.
print("Warning: dir entries are stored in rev %d for noderev %s" % (
self.text.rev, repr(self.id)))
def __repr__(self):
str = 'NodeRev Id: %s\n type: %s\n' % (repr(self.id), repr(self.type))
if self.pred:
str = str + ' pred: %s\n' % repr(self.pred)
if self.text:
str = str + ' %s\n' % repr(self.text)
if self.props:
str = str + ' %s\n' % repr(self.props)
if self.cpath:
str = str + ' cpath: %s\n' % self.cpath
if self.copyroot:
str = str + ' copyroot: %s\n' % self.copyroot
if self.copyfrom:
str = str + ' copyfrom: %s\n' % self.copyfrom
if self.dir:
str = str + ' dir contents:\n'
for k in self.dir:
str = str + ' %s: %s\n' % (k, self.dir[k])
return str[:-1]
class ChangedPaths(object):
def __init__(self, f):
self.changedPaths = {}
while True:
currentOffset = revFile.tell()
action = revFile.readline()
if action == '\n' or action == '':
break
path = action[:-1]
try:
(id, action, textMod, propMod) = action[:-1].split(' ')[:4]
except:
raise DataCorrupt, \
"Data appears to be corrupt at offset %d" % currentOffset
path = path[len(' '.join([id, action, textMod, propMod]))+1:]
line = revFile.readline()
if line != '\n':
(copyfromRev, copyfromPath) = line[:-1].split(' ', 1)
else:
copyfromRev = -1
copyfromPath = ''
self.changedPaths[path] = (id, action, textMod, propMod,
copyfromRev, copyfromPath)
def __iter__(self):
return self.changedPaths.iteritems()
def getRootAndChangedPaths(revFile):
offset = -2
while True:
revFile.seek(offset, 2)
c = revFile.read(1)
if c == '\n':
offset = revFile.tell()
break
offset = offset - 1
(rootNode, changedPaths) = map(int, revFile.readline().split(' '))
return (rootNode, changedPaths)
def dumpChangedPaths(changedPaths):
print("Changed Path Information:")
for (path,
(id, action, textMod, propMod,
copyfromRev, copyfromPath)) in changedPaths:
print(" %s:" % path)
print(" id: %s" % id)
print(" action: %s" % action)
print(" text mod: %s" % textMod)
print(" prop mod: %s" % propMod)
if copyfromRev != -1:
print(" copyfrom path: %s" % copyfromPath)
print(" copyfrom rev: %s" % copyfromRev)
print
class WalkStrategy(object):
def __init__(self, filename, rootOffset, currentRev):
self.f = open(filename, 'rb')
self.rootOffset = rootOffset
self.f.seek(rootOffset)
self.currentRev = currentRev
def _nodeWalker(self):
raise NotImplementedError, "_nodeWalker is not implemented"
def __iter__(self):
self.f.seek(self.rootOffset)
return self._nodeWalker()
class ClassicStrategy(WalkStrategy):
def _nodeWalker (self):
noderev = NodeRev(self.f, self.currentRev)
yield noderev
if noderev.type.type == 'dir':
for e in noderev.dir:
if noderev.dir[e][1].rev == noderev.id.rev:
self.f.seek(noderev.dir[e][1].offset)
for x in self._nodeWalker():
yield x
class RegexpStrategy(WalkStrategy):
def __init__(self, filename, rootOffset, currentRev):
WalkStrategy.__init__(self, filename, rootOffset, currentRev)
# File object passed to the NodeRev() constructor so that it
# doesn't interfere with our regex search.
self.nodeFile = open(filename, 'rb')
def _nodeWalker(self):
nodeId_re = re.compile(r'^id: [a-z0-9\./\-]+$')
self.f.seek(0)
offset = 0
for line in self.f:
match = nodeId_re.search(line)
if match:
self.nodeFile.seek(offset)
noderev = NodeRev(self.nodeFile, self.currentRev)
yield noderev
offset = offset + len(line)
def verify(noderev, revFile, dumpInstructions, dumpWindows):
print(noderev)
if noderev.text:
noderev.text.verify(revFile,
dumpInstructions,
dumpWindows)
if noderev.props and noderev.props.rev == noderev.props.currentRev:
noderev.props.verify(revFile,
dumpInstructions,
dumpWindows)
print
def truncate(noderev, revFile):
txnId = noderev.id
print("Truncating node %s (%s)" % (txnId, noderev.cpath))
# Grab the text rep
textRep = noderev.text
# Fix the text rep contents
offset = textRep.offset
revFile.seek(offset, 0)
revFile.write("PLAIN\x0aENDREP\x0a")
# Fix the node rev
offset = noderev.nodeOffset
revFile.seek(offset, 0)
while True:
savedOffset = revFile.tell()
s = revFile.readline()
if s[:4] == 'text':
revFile.seek(savedOffset, 0)
break
line = revFile.readline()
revFile.seek(savedOffset, 0)
fields = line.split(' ')
overallLength = len(line)
fields[3] = '0' * len(fields[3])
fields[4] = '0' * len(fields[4])
fields[5] = 'd41d8cd98f00b204e9800998ecf8427e'
if len(fields) > 6:
fields[6] = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'
fields[7] = fields[7].strip()
newTextRep = ' '.join(fields) + '\x0a'
assert(len(newTextRep) == overallLength)
revFile.write(newTextRep)
print("Done.")
sys.exit(0)
def fixHeader(e, revFile):
'''Attempt to fix the rep header. e is expected to be of type
InvalidRepHeader, since the exception stores the necessary information
to help repair the file.'''
# First, we need to locate the real start of the text rep
textrep_re = re.compile(r'^(DELTA( \d+ \d+ \d+)?|PLAIN)$')
revFile.seek(0)
offset = 0
originalOffset = 0
for line in revFile:
m = textrep_re.match(line)
if m:
if offset >= originalOffset and offset < e.offset:
originalOffset = offset
headerLen = len(line)
offset = offset + len(line)
print("Original text rep located at", originalOffset)
# Okay, now we have the original offset of the text rep that was
# in the process of being written out. The header portion of the
# text rep has a fsync() done after it, so the 4K blocks actually
# start after the header. We need to make sure to copy the header
# and the next 4K, to be on the safe side.
copyLen = 4096 + headerLen
revFile.seek(originalOffset)
block = revFile.read(copyLen)
print("Copy %d bytes from offset %d" % (copyLen, originalOffset))
print("Write %d bytes at offset %d" % (copyLen, e.offset))
revFile.seek(e.offset)
revFile.write(block)
revFile.flush()
print("Fixed? :-) Re-run fsfsverify without the -f option")
def fixStream(e, revFile):
startOffset = e.svndiffStart
errorOffset = e.windowOffset
repeatedBlockOffset = errorOffset - ((errorOffset - startOffset) % 4096)
# Now we need to move up the rest of the rep
# Determine the final offset by finding the end of the rep.
revFile.seek(errorOffset)
endrep_re = re.compile(".*ENDREP$")
srcLength = 0
for l in revFile:
srcLength += len(l)
m = endrep_re.match(l)
if m:
break
if not m:
raise Exception("Couldn't find end of rep!")
finalOffset = errorOffset + srcLength
srcOffset = errorOffset
destOffset = repeatedBlockOffset
print("Copy %d bytes from offset %d" % (srcLength, srcOffset))
print("Write %d bytes at offset %d" % (srcLength, destOffset))
while srcOffset < finalOffset:
blen = 64*1024
if (finalOffset - srcOffset) < blen:
blen = finalOffset - srcOffset
revFile.seek(srcOffset)
block = revFile.read(blen)
revFile.seek(destOffset)
revFile.write(block)
srcOffset += blen
destOffset += blen
revFile.flush()
revFile.close()
print("Fixed? :-) Re-run fsfsverify without the -f option")
def checkOptions(options):
count = 0
for k,v in options.__dict__.items():
if v and (k in ['dumpChanged', 'truncate', 'fixRlle']):
count = count + 1
if count > 1:
sys.stderr.write("Please use only one of -c, -f, and -t.\n")
sys.exit(1)
if options.dumpChanged and (options.dumpWindows or options.dumpInstructions):
sys.stderr.write(\
"-c is incompatible with -w and -i. Dropping -w and/or -i.\n")
if options.noVerify and (options.dumpWindows or options.dumpInstructions):
sys.stderr.write(\
"--no-verify is incompatible with -w and -i. Dropping -w and/or -i.\n")
def handleError(error, withTraceback=False):
print
if withTraceback:
import traceback
traceback.print_exc()
sys.stderr.write("Error %s: %s\n" % (error.__class__.__name__, str(e)))
sys.stderr.write("Try running with -f to fix the revision\n")
sys.exit(1)
if __name__ == '__main__':
from optparse import OptionParser
parser = OptionParser("usage: %prog [OPTIONS] REV-FILE")
parser.add_option("-c", "--changed-paths",
action="store_true", dest="dumpChanged",
help="Dump changed path information", default=False)
parser.add_option("", "--no-verify",
action="store_true", dest="noVerify",
help="Don't parse svndiff streams.", default=False)
parser.add_option("-i", "--instructions",
action="store_true", dest="dumpInstructions",
help="Dump instructions (implies -w)", default=False)
parser.add_option("-w", "--windows",
action="store_true", dest="dumpWindows",
help="Dump windows", default=False)
parser.add_option("-n", "--noderev-regexp",
action="store_true", dest="noderevRegexp",
help="Find all noderevs using a regexp", default=False)
parser.add_option("-f", "--fix-read-length-line-error",
action="store_true", dest="fixRlle",
help="Attempt to fix the read length line error",
default=False)
parser.add_option("-t", "--truncate",
action="store", type="string", dest="truncate",
help="Truncate the specified node rev.",
default=None)
parser.add_option("", "--traceback",
action="store_true", dest="showTraceback",
help="Show error tracebacks (mainly used for debugging).",
default=False)
(options, args) = parser.parse_args()
if len(args) != 1:
sys.stderr.write("Please specify exactly one rev file.\n")
parser.print_help()
sys.exit(1)
checkOptions(options)
filename = args[0]
if options.dumpInstructions:
options.dumpWindows = True
LOG_MASK |= LOG_INSTRUCTIONS
if options.dumpWindows:
LOG_MASK |= LOG_WINDOWS
if options.truncate or options.fixRlle:
revFile = open(filename, 'r+b')
else:
revFile = open(filename, 'rb')
(root, changed) = getRootAndChangedPaths(revFile)
if options.dumpChanged:
revFile.seek(changed)
changedPaths = ChangedPaths(revFile)
dumpChangedPaths(changedPaths)
sys.exit(0)
try:
import re
match = re.match('([0-9]+)', os.path.basename(filename))
currentRev = int(match.group(1), 10)
except:
raise CmdlineError(
"The file name must start with a decimal " +
"number that indicates the revision")
if options.noderevRegexp:
strategy = RegexpStrategy(filename, root, currentRev)
else:
strategy = ClassicStrategy(filename, root, currentRev)
# Make stderr the same as stdout. This helps when trying to catch all of the
# output from a run.
sys.stderr = sys.stdout
try:
for noderev in strategy:
try:
if options.truncate:
# Check to see if this is the rev we need to truncate
if options.truncate == noderev.id:
truncate(noderev, revFile)
else:
print(noderev)
if not options.noVerify:
if noderev.text:
noderev.text.verify(revFile,
options.dumpInstructions,
options.dumpWindows)
if noderev.props and noderev.props.rev == noderev.props.currentRev:
noderev.props.verify(revFile,
options.dumpInstructions,
options.dumpWindows)
print
except:
sys.stdout.flush()
raise
except InvalidRepHeader as e:
if not options.fixRlle:
handleError(e, options.showTraceback)
fixHeader(e, revFile)
except PotentiallyFixableException as e:
if not options.fixRlle:
handleError(e, options.showTraceback)
fixStream(e, revFile)