| #!/usr/bin/env python |
| # Copyright (c) 2006, 2007 by John Szakmeister <john at szakmeister dot net> |
| # |
| # This program is free software; you can redistribute it and/or modify |
| # it under the terms of the GNU General Public License as published by |
| # the Free Software Foundation; either version 2 of the License, or |
| # (at your option) any later version. |
| # |
| # This program is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with this program; if not, write to the Free Software |
| # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| |
| import os |
| import optparse |
| import sys |
| import re |
| |
| try: |
| import hashlib |
| md5_new = hashlib.md5 |
| except ImportError: |
| import md5 |
| md5_new = md5.new |
| |
| |
| # A handy constant for refering to the NULL digest (one that |
| # matches every digest). |
| NULL_DIGEST = '00000000000000000000000000000000' |
| |
| |
| class FsfsVerifyException(Exception): |
| pass |
| |
| |
| class PotentiallyFixableException(FsfsVerifyException): |
| '''Represents a class of problems that we may be able to fix.''' |
| |
| def __init__(self, message, offset): |
| FsfsVerifyException.__init__(self, message) |
| self.offset = offset |
| |
| |
| class InvalidInstruction(PotentiallyFixableException): |
| pass |
| |
| |
| class InvalidCompressedStream(PotentiallyFixableException): |
| pass |
| |
| |
| class InvalidRepHeader(PotentiallyFixableException): |
| pass |
| |
| |
| class InvalidWindow(PotentiallyFixableException): |
| pass |
| |
| |
| class InvalidSvndiffVersion(FsfsVerifyException): |
| pass |
| |
| |
| class InvalidSvndiffHeader(FsfsVerifyException): |
| pass |
| |
| |
| class DataCorrupt(FsfsVerifyException): |
| pass |
| |
| |
| class NoMoreData(FsfsVerifyException): |
| pass |
| |
| |
| class CmdlineError(FsfsVerifyException): |
| pass |
| |
| |
| LOG_INSTRUCTIONS = 1 |
| LOG_WINDOWS = 2 |
| LOG_SVNDIFF = 4 |
| |
| LOG_MASK = LOG_SVNDIFF |
| |
| |
| def log(type, indent, format, *args): |
| if type & LOG_MASK: |
| indentStr = ' ' * indent |
| str = format % args |
| str = '\n'.join([indentStr + x for x in str.split('\n')]) |
| print(str) |
| |
| |
| class ByteStream(object): |
| def __init__(self, fileobj): |
| self._f = fileobj |
| |
| def readByte(self): |
| return ord(self._f.read(1)) |
| |
| def tell(self): |
| return self._f.tell() |
| |
| def advance(self, numBytes): |
| self._f.seek(numBytes, 1) |
| |
| def clone(self): |
| if hasattr(self._f, 'clone'): |
| newFileObj = self._f.clone() |
| else: |
| # We expect the file object to map to a real file |
| # |
| # Tried using dup(), but (at least on the mac), that ends up |
| # creating 2 handles to the same underlying os file object, |
| # instead of two independent file objects. So, we resort to |
| # an open call to create a new file object |
| newFileObj = open(self._f.name, 'rb') |
| newFileObj.seek(self._f.tell()) |
| return ByteStream(newFileObj) |
| |
| # The following let ByteStream behave as a file within the |
| # context of this script. |
| |
| def read(self, *args, **kwargs): |
| return self._f.read(*args, **kwargs) |
| |
| def seek(self, *args, **kwargs): |
| return self._f.seek(*args, **kwargs) |
| |
| |
| class ZlibByteStream(ByteStream): |
| def __init__(self, fileobj, length): |
| self._f = fileobj |
| |
| # Store the number of bytes consumed thus far so we can compute an offset |
| self._numBytesConsumed = 0 |
| |
| self._startingOffset = self._f.tell() |
| |
| import zlib, binascii |
| self._z = zlib.decompressobj(15) |
| |
| self._buffer = self._z.decompress(self._f.read(length)) |
| self._origBufferLength = len(self._buffer) |
| |
| def readByte(self): |
| if not self._buffer: |
| raise NoMoreData, "Unexpected end of data stream!" |
| |
| byte = self._buffer[0] |
| self._buffer = self._buffer[1:] |
| |
| return ord(byte) |
| |
| def tell(self): |
| return self._origBufferLength - len(self._buffer) |
| |
| def advance(self, numBytes): |
| while numBytes: |
| self.readByte() |
| |
| def clone(self): |
| if hasattr(self._f, 'clone'): |
| newFileObj = self._f.clone() |
| else: |
| newFileObj = open(self._f.name, 'rb') |
| newFileObj.seek(self._f.tell()) |
| return ByteStream(newFileObj) |
| |
| # The following let ByteStream behave as a file within the |
| # context of this script. |
| |
| def read(self, *args, **kwargs): |
| raise |
| |
| def seek(self, *args, **kwargs): |
| raise |
| |
| |
| def getVarint(byteStream): |
| '''Grabs a variable sized int from a bitstream (meaning this function |
| doesn't seek).''' |
| |
| i = long(0) |
| while True: |
| byte = byteStream.readByte() |
| i = (i << 7) + (byte & 0x7F) |
| if byte & 0x80 == 0: |
| break |
| return i |
| |
| |
| INSTR_COPY_SOURCE = 'copy-source' |
| INSTR_COPY_TARGET = 'copy-target' |
| INSTR_COPY_DATA = 'copy-data' |
| |
| |
| class SvndiffInstruction(object): |
| def __init__(self, byteStream): |
| self.instrOffset = byteStream.tell() |
| |
| byte = byteStream.readByte() |
| |
| instruction = (byte >> 6) & 3 |
| length = byte & 0x3F |
| |
| if instruction == 3: |
| raise InvalidInstruction( |
| "Invalid instruction found at offset %d (%02X)" % (self.instrOffset, |
| byte), |
| self.instrOffset) |
| |
| if instruction == 0: |
| self.type = INSTR_COPY_SOURCE |
| elif instruction == 1: |
| self.type = INSTR_COPY_TARGET |
| else: |
| self.type = INSTR_COPY_DATA |
| |
| if length == 0: |
| # Length is coded as a varint following the current byte |
| length = getVarint(byteStream) |
| |
| |
| self.length = length |
| |
| if (self.type == INSTR_COPY_SOURCE) or (self.type == INSTR_COPY_TARGET): |
| self.offset = getVarint(byteStream) |
| |
| if self.type == INSTR_COPY_SOURCE: |
| self.sourceOffset = self.offset |
| else: |
| self.sourceOffset = 0 |
| |
| if self.type == INSTR_COPY_TARGET: |
| self.targetOffset = self.offset |
| else: |
| self.targetOffset = 0 |
| |
| # Determine the number of bytes consumed in the source stream, target |
| # stream, and the data stream |
| |
| if self.type == INSTR_COPY_SOURCE: |
| self.sourceLength = self.length |
| else: |
| self.sourceLength = 0 |
| |
| if self.type == INSTR_COPY_TARGET: |
| self.targetLength = self.length |
| else: |
| self.targetLength = 0 |
| |
| if self.type == INSTR_COPY_DATA: |
| self.dataLength = self.length |
| else: |
| self.dataLength = 0 |
| |
| self.instrLength = byteStream.tell() - self.instrOffset |
| |
| def __repr__(self): |
| return '<SvndiffInstruction %s so:%d sl:%d to: %d tl:%d dl:%d (%d, %d)>' % ( |
| self.type, self.sourceOffset, self.sourceLength, self.targetOffset, |
| self.targetLength, self.dataLength, self.instrOffset, self.instrLength) |
| |
| |
| class Window(object): |
| def __init__(self, byteStream, svndiffVersion): |
| if svndiffVersion not in [0, 1]: |
| raise InvalidSvndiffVersion, \ |
| "Invalid svndiff version %d" % svndiffVersion |
| |
| # Record the initial offset of the window |
| self.windowOffset = byteStream.tell() |
| |
| try: |
| self.sourceOffset = getVarint(byteStream) |
| self.sourceLength = getVarint(byteStream) |
| self.targetLength = getVarint(byteStream) |
| self.instrLength = getVarint(byteStream) |
| self.dataLength = getVarint(byteStream) |
| self.windowHeaderLength = byteStream.tell() - self.windowOffset |
| self.windowLength = \ |
| self.windowHeaderLength + self.instrLength + self.dataLength |
| |
| # Store the byte stream, and clone it for use as a data stream. |
| self.instrByteStream = byteStream |
| self.dataByteStream = byteStream.clone() |
| |
| # Advance the data stream past the instructions to the start of the data. |
| self.dataByteStream.advance(self.instrLength) |
| except: |
| e = InvalidWindow( |
| "The window header at offset %d appears to be corrupted" % \ |
| (self.windowOffset), |
| self.windowOffset) |
| e.windowOffset = self.windowOffset |
| raise e |
| |
| |
| # In svndiff1, the instruction area starts with a varint-encoded length. |
| # If this length matches the one encoded in the header, then there is no |
| # compression. If it differs, then the stream is compressed with zlib. |
| |
| self.origInstrStream = self.instrByteStream |
| self.origDataStream = self.dataByteStream |
| self.isInstrCompressed = False |
| self.isDataCompressed = False |
| self.compressedInstrLength = self.instrLength |
| self.compressedDataLength = self.dataLength |
| |
| if svndiffVersion == 1: |
| try: |
| offset = self.instrByteStream.tell() |
| encodedInstrLength = getVarint(self.instrByteStream) |
| instrIntSize = self.instrByteStream.tell() - offset |
| |
| offset = self.dataByteStream.tell() |
| encodedDataLength = getVarint(self.dataByteStream) |
| dataIntSize = self.dataByteStream.tell() - offset |
| |
| self.instrLength = encodedInstrLength |
| self.dataLength = encodedDataLength |
| except: |
| e = InvalidWindow( |
| "The window header at offset %d appears to be corrupted" % \ |
| (self.windowOffset), |
| self.windowOffset) |
| e.windowOffset = self.windowOffset |
| raise e |
| |
| # Now, we need to make a determination about whether the data and |
| # instructions are compressed. If they are, we need to zlib decompress |
| # them. We do that by creating another stream and that will decompress |
| # the data on the fly. |
| try: |
| offset = self.instrByteStream.tell() |
| if self.compressedInstrLength - instrIntSize != self.instrLength: |
| self.origInstrStream = self.instrByteStream |
| self.instrByteStream = ZlibByteStream(self.origInstrStream, |
| self.compressedInstrLength) |
| self.isInstrCompressed = True |
| except Exception as e: |
| new_e = InvalidCompressedStream( |
| "Invalid compressed instr stream at offset %d (%s)" % (offset, |
| str(e)), |
| offset) |
| new_e.windowOffset = self.windowOffset |
| raise new_e |
| |
| try: |
| offset = self.dataByteStream.tell() |
| if self.compressedDataLength - dataIntSize != self.dataLength: |
| self.origDataStream = self.dataByteStream |
| self.dataByteStream = ZlibByteStream(self.origDataStream, |
| self.compressedDataLength) |
| self.isDataCompressed = True |
| except Exception as e: |
| new_e = InvalidCompressedStream( |
| "Invalid compressed data stream at offset %d (%s, %s)\n" % ( |
| offset, str(e), repr(self)), |
| offset) |
| new_e.windowOffset = self.windowOffset |
| raise new_e |
| |
| def verify(self): |
| expectedInstrLength = self.instrLength |
| expectedDataLength = self.dataLength |
| expectedTargetLength = self.targetLength |
| expectedSourceLength = self.sourceLength |
| |
| computedInstrLength = 0 |
| computedDataLength = 0 |
| computedTargetLength = 0 |
| computedSourceLength = 0 |
| |
| if expectedInstrLength == 0: |
| e = InvalidWindow( |
| "Corrupt window (at offset %d) has 0 instructions?!" % self.windowOffset, |
| self.windowOffset) |
| e.windowOffset = self.windowOffset |
| raise e |
| |
| while computedInstrLength < expectedInstrLength: |
| try: |
| instr = SvndiffInstruction(self.instrByteStream) |
| except PotentiallyFixableException as e: |
| e.window = self |
| e.windowOffset = self.windowOffset |
| raise |
| |
| log(LOG_INSTRUCTIONS, 4, repr(instr)) |
| |
| computedInstrLength += instr.instrLength |
| computedDataLength += instr.dataLength |
| computedSourceLength += instr.sourceLength |
| computedTargetLength += \ |
| instr.targetLength + instr.sourceLength + instr.dataLength |
| |
| if computedInstrLength != expectedInstrLength: |
| e = InvalidWindow( |
| "The number of instruction bytes consumed (%d) doesn't match the expected number (%d)" % \ |
| (computedInstrLength, expectedInstrLength), |
| self.windowOffset) |
| e.windowOffset = self.windowOffset |
| raise e |
| |
| if computedDataLength != expectedDataLength: |
| e = InvalidWindow( |
| "The number of data bytes consumed (%d) doesn't match the expected number (%d)" % \ |
| (computedDataLength, expectedDataLength), |
| self.windowOffset) |
| e.windowOffset = self.windowOffset |
| raise e |
| |
| if computedTargetLength != expectedTargetLength: |
| e = InvalidWindow( |
| "The number of target bytes consumed (%d) doesn't match the expected number (%d)" % \ |
| (computedTargetLength, expectedTargetLength), |
| self.windowOffset) |
| e.windowOffset = self.windowOffset |
| raise e |
| |
| # It appears that the source length specified in the window, isn't exactly |
| # equal to what gets consumed. I suspect that's because the algorithm is using different |
| # offsets within the window, and one offset/length pair will reach the end of the window. |
| # However, this hasn't shown to be a clear indicator of corruption. So for now, I'm |
| # commenting it out. |
| # |
| #if computedSourceLength != expectedSourceLength: |
| # e = InvalidWindow( |
| # "The number of source bytes consumed (%d) doesn't match the expected number (%d)" % \ |
| # (computedSourceLength, expectedSourceLength), |
| # self.windowOffset) |
| # e.windowOffset = self.windowOffset |
| # raise e |
| |
| # Advance past the data. We do this using seek because we might have |
| # read a few bytes from the stream if it potentially had compressed data |
| self.origInstrStream.seek(self.windowOffset + self.windowLength) |
| |
| def __repr__(self): |
| if hasattr(self, 'compressedInstrLength'): |
| str = 'cil: %d cdl: %d ' % (self.compressedInstrLength, |
| self.compressedDataLength) |
| else: |
| str = '' |
| |
| return "<Window wo:%d so:%d sl:%d tl:%d %sil:%d dl:%d whl:%d wl:%d>" % ( |
| self.windowOffset, self.sourceOffset, self.sourceLength, |
| self.targetLength, str, self.instrLength, self.dataLength, |
| self.windowHeaderLength, self.windowLength) |
| |
| |
| class Svndiff(object): |
| def __init__(self, fileobj, length): |
| self._f = fileobj |
| self.startingOffset = self._f.tell() |
| |
| header = self._f.read(4) |
| if len(header) != 4: |
| raise EOFError, \ |
| "Unexpected end of file while svndiff header at offset %d)" % \ |
| (self._f.tell()) |
| |
| if header[0:3] != 'SVN': |
| raise InvalidSvndiffHeader, "Invalid svndiff header at offset %d" % \ |
| (self.startingOffset) |
| |
| self.version = ord(header[3]) |
| if self.version not in [0, 1]: |
| raise InvalidSvndiffVersion, "Invalid svndiff version %d" % self.version |
| |
| self._length = length - 4 |
| |
| def verify(self): |
| self._f.seek(self.startingOffset+4) |
| |
| bs = ByteStream(self._f) |
| |
| log(LOG_SVNDIFF, 2, "<Svndiff so: %d ver: %d>", self.startingOffset, |
| self.version) |
| |
| try: |
| remaining = self._length |
| while remaining > 0: |
| w = Window(bs, self.version) |
| log(LOG_WINDOWS, 3, repr(w)) |
| w.verify() |
| remaining -= w.windowLength |
| except PotentiallyFixableException as e: |
| e.svndiffStart = self.startingOffset |
| raise |
| |
| |
| def getDirHash(f): |
| l = f.readline() |
| if l != 'PLAIN\n': |
| raise ValueError, "Expected a PLAIN representation (%d)" % f.tell() |
| |
| hash = {} |
| |
| while True: |
| field = f.readline()[:-1] |
| if field == 'END': |
| break |
| assert(field[0] == 'K') |
| length = int(field.split(' ')[1]) |
| field = f.readline()[:length] |
| |
| value = f.readline()[:-1] |
| assert(value[0] == 'V') |
| length = int(value.split(' ')[1]) |
| value = f.readline()[:length] |
| |
| (type, txn) = value.split(' ') |
| hash[field] = [NodeType(type), NodeId(txn)] |
| |
| return hash |
| |
| |
| |
| class Rep(object): |
| def __init__(self, type, rev, offset, length, size, digest, |
| contentType, currentRev, noderev): |
| self.type = type |
| self.rev = int(rev) |
| self.offset = int(offset) |
| self.length = int(length) |
| self.size = int(size) |
| |
| self.digest = digest.strip() |
| self.currentRev = currentRev |
| |
| self.contentType = contentType |
| self.noderev = noderev |
| |
| def __repr__(self): |
| if not self.contentType: |
| contentType = 'UNKNOWN' |
| else: |
| if self.contentType not in ['PLAIN', 'DELTA', None]: |
| contentType = 'INVALID' |
| else: |
| contentType = self.contentType |
| return '%s: %s %d %d %d %d %s' % (self.type, contentType, self.rev, |
| self.offset, self.length, self.size, |
| self.digest) |
| |
| def verify(self, f, dumpInstructions, dumpWindows): |
| if self.contentType not in ['PLAIN', 'DELTA', None]: |
| e = InvalidRepHeader("Invalid rep header found at %d (%s)!" % \ |
| (self.offset, self.contentType), |
| self.offset) |
| e.rep = self |
| e.noderev = self.noderev |
| raise e |
| |
| if self.rev != currentRev: |
| sys.stderr.write("Skipping text rep since it isn't present in the current rev\n") |
| return |
| |
| f.seek(self.offset) |
| header = f.read(5) |
| if header != self.contentType: |
| raise FsfsVerifyException, \ |
| "Invalid rep header found at %d (%s, %s)!" % (self.offset, header, |
| self.contentType) |
| |
| if header == 'DELTA': |
| line = f.readline() |
| digest = None |
| |
| # This should be the start of the svndiff stream |
| actual_start = f.tell() |
| try: |
| svndiff = Svndiff(f, self.length) |
| svndiff.verify() |
| except Exception as e: |
| e.rep = self |
| e.noderev = self.noderev |
| raise |
| |
| if digest and (self.digest != NULL_DIGEST): |
| assert(digest == self.digest) |
| else: |
| if f.read(1) != '\n': |
| raise DataCorrupt, "Expected a '\\n' after PLAIN" |
| |
| m = md5_new() |
| m.update(f.read(self.length)) |
| |
| if self.digest and self.digest != NULL_DIGEST \ |
| and self.digest != m.hexdigest(): |
| raise DataCorrupt, \ |
| "PLAIN data is corrupted. Expected digest '%s', computed '%s'." % ( |
| self.digest, m.hexdigest()) |
| |
| buf = f.read(6) |
| if buf != 'ENDREP': |
| raise DataCorrupt, "Terminating ENDREP missing! %r, %r" % (buf, self) |
| pass |
| |
| |
| class TextRep(Rep): |
| def __init__(self, rev, offset, length, size, digest, |
| contentType, currentRev, noderev, sha1=None, uniquifier=None): |
| super(TextRep,self).__init__('text', rev, offset, length, size, |
| digest, contentType, currentRev, noderev) |
| self.sha1 = None |
| self.uniquifier = None |
| |
| |
| class PropRep(Rep): |
| def __init__(self, rev, offset, length, size, digest, |
| contentType, currentRev, noderev): |
| super(PropRep,self).__init__('prop', rev, offset, length, size, |
| digest, contentType, currentRev, noderev) |
| |
| |
| class NodeId(object): |
| def __init__(self, nodeid): |
| (self.txn_name, offset) = nodeid.split('/') |
| self.offset = int(offset) |
| self.rev = int(self.txn_name.split('.')[2][1:]) |
| |
| def __repr__(self): |
| return self.txn_name + '/%d' % self.offset |
| |
| def __eq__ (self, other): |
| s = self.txn_name + '/%d' % self.offset |
| if s == other: |
| return True |
| |
| return False |
| |
| |
| class NodeType(object): |
| def __init__(self, t): |
| if (t != 'file') and (t != 'dir'): |
| raise ValueError, 'Invalid Node type received: "%s"' % t |
| self.type = t |
| |
| def __repr__(self): |
| return self.type[:] |
| |
| |
| class NodeRev(object): |
| def __init__(self, f, currentRev): |
| self.pred = None |
| self.text = None |
| self.props = None |
| self.cpath = None |
| self.copyroot = None |
| self.copyfrom = None |
| self.dir = [] |
| |
| self.nodeOffset = f.tell() |
| |
| while True: |
| currentOffset = f.tell() |
| line = f.readline() |
| if line == '': |
| raise IOError, "Unexpected end of file at offset %d" % currentOffset |
| if line == '\n': |
| break |
| |
| # break apart the line |
| try: |
| (field, value) = line.split(':', 1) |
| except: |
| print("line: '%s'" % repr(line)) |
| print("Node revision offset: %i" % self.nodeOffset) |
| print("Current file position: %i" % f.tell()) |
| raise |
| |
| if field == "": |
| print("line: '%s'" % repr(line)) |
| print("Node revision offset: %i" % self.nodeOffset) |
| print("Current file position: %i" % f.tell()) |
| raise Exception("Empty field in node revision") |
| |
| # pull of the leading space and trailing new line |
| if len(value) < 2: |
| raise FsfsVerifyException("value needs to contain 2 or more bytes (%d)" % currentOffset) |
| value = value[1:-1] |
| |
| assert value != "" |
| |
| if field == 'id': |
| self.id = NodeId(value) |
| elif field == 'type': |
| self.type = NodeType(value) |
| elif field == 'pred': |
| self.pred = NodeId(value) |
| elif field == 'text': |
| values = value.split(' ') |
| rev = int(values[0]) |
| offset = int(values[1]) |
| length = int(values[2]) |
| size = int(values[3]) |
| digest = values[4] |
| |
| if len(values) > 5: |
| sha1 = values[5] |
| else: |
| sha1 = None |
| |
| if len(values) > 6: |
| uniquifier = values[6] |
| else: |
| uniquifier = None |
| |
| if rev != currentRev: |
| contentType = None |
| else: |
| savedOffset = f.tell() |
| f.seek(offset) |
| contentType = f.read(5) |
| f.seek(savedOffset) |
| |
| self.text = TextRep(rev, offset, length, size, digest, |
| contentType, currentRev, self, sha1, uniquifier) |
| elif field == 'props': |
| (rev, offset, length, size, digest) = value.split(' ') |
| rev = int(rev) |
| offset = int(offset) |
| length = int(length) |
| size = int(size) |
| |
| if rev != currentRev: |
| contentType = None |
| else: |
| savedOffset = f.tell() |
| f.seek(offset) |
| contentType = f.read(5) |
| f.seek(savedOffset) |
| |
| self.props = PropRep(rev, offset, length, size, digest, |
| contentType, currentRev, self) |
| elif field == 'cpath': |
| self.cpath = value |
| elif field == 'copyroot': |
| self.copyroot = value |
| elif field == 'copyfrom': |
| self.copyfrom = value |
| elif field == 'count' or field == 'minfo-cnt' or field == 'minfo-here': |
| pass |
| else: |
| raise Exception("Unrecognized field '%s'\n" % field) |
| |
| if self.type.type == 'dir': |
| if self.text: |
| if self.id.rev == self.text.rev: |
| offset = f.tell() |
| f.seek(self.text.offset) |
| self.dir = getDirHash(f) |
| |
| for k,v in self.dir.items(): |
| nodeType, nodeId = v |
| |
| if nodeId.rev != self.id.rev: |
| if not os.path.exists(str(nodeId.rev)): |
| print("Can't check %s" % repr(nodeId)) |
| continue |
| tmp = open(str(nodeId.rev),'rb') |
| tmp.seek(nodeId.offset) |
| idLine = tmp.readline() |
| tmp.close() |
| else: |
| f.seek(nodeId.offset) |
| idLine = f.readline() |
| |
| if idLine != ("id: %s\n" % nodeId): |
| raise DataCorrupt( |
| ("Entry for '%s' at " % k ) + |
| ("offset %d is pointing to an " % self.text.offset) + |
| ("invalid location (node claims to be at offset %d)" % ( |
| nodeId.offset)) |
| ) |
| f.seek(offset) |
| else: |
| # The directory entries are stored in another file. |
| print("Warning: dir entries are stored in rev %d for noderev %s" % ( |
| self.text.rev, repr(self.id))) |
| |
| def __repr__(self): |
| str = 'NodeRev Id: %s\n type: %s\n' % (repr(self.id), repr(self.type)) |
| if self.pred: |
| str = str + ' pred: %s\n' % repr(self.pred) |
| if self.text: |
| str = str + ' %s\n' % repr(self.text) |
| if self.props: |
| str = str + ' %s\n' % repr(self.props) |
| if self.cpath: |
| str = str + ' cpath: %s\n' % self.cpath |
| if self.copyroot: |
| str = str + ' copyroot: %s\n' % self.copyroot |
| if self.copyfrom: |
| str = str + ' copyfrom: %s\n' % self.copyfrom |
| if self.dir: |
| str = str + ' dir contents:\n' |
| for k in self.dir: |
| str = str + ' %s: %s\n' % (k, self.dir[k]) |
| return str[:-1] |
| |
| |
| class ChangedPaths(object): |
| def __init__(self, f): |
| self.changedPaths = {} |
| |
| while True: |
| currentOffset = revFile.tell() |
| action = revFile.readline() |
| if action == '\n' or action == '': |
| break |
| |
| path = action[:-1] |
| try: |
| (id, action, textMod, propMod) = action[:-1].split(' ')[:4] |
| except: |
| raise DataCorrupt, \ |
| "Data appears to be corrupt at offset %d" % currentOffset |
| path = path[len(' '.join([id, action, textMod, propMod]))+1:] |
| |
| line = revFile.readline() |
| if line != '\n': |
| (copyfromRev, copyfromPath) = line[:-1].split(' ', 1) |
| else: |
| copyfromRev = -1 |
| copyfromPath = '' |
| |
| self.changedPaths[path] = (id, action, textMod, propMod, |
| copyfromRev, copyfromPath) |
| |
| |
| def __iter__(self): |
| return self.changedPaths.iteritems() |
| |
| |
| def getRootAndChangedPaths(revFile): |
| offset = -2 |
| while True: |
| revFile.seek(offset, 2) |
| c = revFile.read(1) |
| if c == '\n': |
| offset = revFile.tell() |
| break |
| offset = offset - 1 |
| |
| (rootNode, changedPaths) = map(int, revFile.readline().split(' ')) |
| |
| return (rootNode, changedPaths) |
| |
| |
| def dumpChangedPaths(changedPaths): |
| print("Changed Path Information:") |
| for (path, |
| (id, action, textMod, propMod, |
| copyfromRev, copyfromPath)) in changedPaths: |
| print(" %s:" % path) |
| print(" id: %s" % id) |
| print(" action: %s" % action) |
| print(" text mod: %s" % textMod) |
| print(" prop mod: %s" % propMod) |
| if copyfromRev != -1: |
| print(" copyfrom path: %s" % copyfromPath) |
| print(" copyfrom rev: %s" % copyfromRev) |
| print |
| |
| |
| class WalkStrategy(object): |
| def __init__(self, filename, rootOffset, currentRev): |
| self.f = open(filename, 'rb') |
| self.rootOffset = rootOffset |
| self.f.seek(rootOffset) |
| self.currentRev = currentRev |
| |
| def _nodeWalker(self): |
| raise NotImplementedError, "_nodeWalker is not implemented" |
| |
| def __iter__(self): |
| self.f.seek(self.rootOffset) |
| return self._nodeWalker() |
| |
| |
| class ClassicStrategy(WalkStrategy): |
| def _nodeWalker (self): |
| noderev = NodeRev(self.f, self.currentRev) |
| yield noderev |
| |
| if noderev.type.type == 'dir': |
| for e in noderev.dir: |
| if noderev.dir[e][1].rev == noderev.id.rev: |
| self.f.seek(noderev.dir[e][1].offset) |
| for x in self._nodeWalker(): |
| yield x |
| |
| |
| class RegexpStrategy(WalkStrategy): |
| def __init__(self, filename, rootOffset, currentRev): |
| WalkStrategy.__init__(self, filename, rootOffset, currentRev) |
| |
| # File object passed to the NodeRev() constructor so that it |
| # doesn't interfere with our regex search. |
| self.nodeFile = open(filename, 'rb') |
| |
| def _nodeWalker(self): |
| nodeId_re = re.compile(r'^id: [a-z0-9\./\-]+$') |
| |
| self.f.seek(0) |
| offset = 0 |
| |
| for line in self.f: |
| match = nodeId_re.search(line) |
| if match: |
| self.nodeFile.seek(offset) |
| noderev = NodeRev(self.nodeFile, self.currentRev) |
| yield noderev |
| |
| offset = offset + len(line) |
| |
| |
| def verify(noderev, revFile, dumpInstructions, dumpWindows): |
| print(noderev) |
| |
| if noderev.text: |
| noderev.text.verify(revFile, |
| dumpInstructions, |
| dumpWindows) |
| |
| if noderev.props and noderev.props.rev == noderev.props.currentRev: |
| noderev.props.verify(revFile, |
| dumpInstructions, |
| dumpWindows) |
| |
| print |
| |
| |
| def truncate(noderev, revFile): |
| txnId = noderev.id |
| |
| print("Truncating node %s (%s)" % (txnId, noderev.cpath)) |
| |
| # Grab the text rep |
| textRep = noderev.text |
| |
| # Fix the text rep contents |
| offset = textRep.offset |
| revFile.seek(offset, 0) |
| revFile.write("PLAIN\x0aENDREP\x0a") |
| |
| # Fix the node rev |
| offset = noderev.nodeOffset |
| revFile.seek(offset, 0) |
| while True: |
| savedOffset = revFile.tell() |
| s = revFile.readline() |
| if s[:4] == 'text': |
| revFile.seek(savedOffset, 0) |
| break |
| |
| line = revFile.readline() |
| revFile.seek(savedOffset, 0) |
| fields = line.split(' ') |
| overallLength = len(line) |
| |
| fields[3] = '0' * len(fields[3]) |
| fields[4] = '0' * len(fields[4]) |
| fields[5] = 'd41d8cd98f00b204e9800998ecf8427e' |
| |
| if len(fields) > 6: |
| fields[6] = 'da39a3ee5e6b4b0d3255bfef95601890afd80709' |
| fields[7] = fields[7].strip() |
| |
| newTextRep = ' '.join(fields) + '\x0a' |
| assert(len(newTextRep) == overallLength) |
| revFile.write(newTextRep) |
| print("Done.") |
| sys.exit(0) |
| |
| |
| def fixHeader(e, revFile): |
| '''Attempt to fix the rep header. e is expected to be of type |
| InvalidRepHeader, since the exception stores the necessary information |
| to help repair the file.''' |
| |
| # First, we need to locate the real start of the text rep |
| textrep_re = re.compile(r'^(DELTA( \d+ \d+ \d+)?|PLAIN)$') |
| |
| revFile.seek(0) |
| offset = 0 |
| originalOffset = 0 |
| for line in revFile: |
| m = textrep_re.match(line) |
| if m: |
| if offset >= originalOffset and offset < e.offset: |
| originalOffset = offset |
| headerLen = len(line) |
| offset = offset + len(line) |
| |
| print("Original text rep located at", originalOffset) |
| |
| # Okay, now we have the original offset of the text rep that was |
| # in the process of being written out. The header portion of the |
| # text rep has a fsync() done after it, so the 4K blocks actually |
| # start after the header. We need to make sure to copy the header |
| # and the next 4K, to be on the safe side. |
| copyLen = 4096 + headerLen |
| |
| revFile.seek(originalOffset) |
| block = revFile.read(copyLen) |
| print("Copy %d bytes from offset %d" % (copyLen, originalOffset)) |
| |
| print("Write %d bytes at offset %d" % (copyLen, e.offset)) |
| revFile.seek(e.offset) |
| revFile.write(block) |
| revFile.flush() |
| |
| print("Fixed? :-) Re-run fsfsverify without the -f option") |
| |
| |
| def fixStream(e, revFile): |
| startOffset = e.svndiffStart |
| errorOffset = e.windowOffset |
| |
| repeatedBlockOffset = errorOffset - ((errorOffset - startOffset) % 4096) |
| |
| # Now we need to move up the rest of the rep |
| |
| # Determine the final offset by finding the end of the rep. |
| revFile.seek(errorOffset) |
| |
| endrep_re = re.compile(".*ENDREP$") |
| srcLength = 0 |
| for l in revFile: |
| srcLength += len(l) |
| m = endrep_re.match(l) |
| if m: |
| break |
| |
| if not m: |
| raise Exception("Couldn't find end of rep!") |
| |
| finalOffset = errorOffset + srcLength |
| srcOffset = errorOffset |
| destOffset = repeatedBlockOffset |
| |
| print("Copy %d bytes from offset %d" % (srcLength, srcOffset)) |
| print("Write %d bytes at offset %d" % (srcLength, destOffset)) |
| |
| while srcOffset < finalOffset: |
| blen = 64*1024 |
| if (finalOffset - srcOffset) < blen: |
| blen = finalOffset - srcOffset |
| revFile.seek(srcOffset) |
| block = revFile.read(blen) |
| revFile.seek(destOffset) |
| revFile.write(block) |
| |
| srcOffset += blen |
| destOffset += blen |
| |
| revFile.flush() |
| revFile.close() |
| |
| print("Fixed? :-) Re-run fsfsverify without the -f option") |
| |
| |
| def checkOptions(options): |
| count = 0 |
| for k,v in options.__dict__.items(): |
| if v and (k in ['dumpChanged', 'truncate', 'fixRlle']): |
| count = count + 1 |
| |
| if count > 1: |
| sys.stderr.write("Please use only one of -c, -f, and -t.\n") |
| sys.exit(1) |
| |
| if options.dumpChanged and (options.dumpWindows or options.dumpInstructions): |
| sys.stderr.write(\ |
| "-c is incompatible with -w and -i. Dropping -w and/or -i.\n") |
| |
| if options.noVerify and (options.dumpWindows or options.dumpInstructions): |
| sys.stderr.write(\ |
| "--no-verify is incompatible with -w and -i. Dropping -w and/or -i.\n") |
| |
| |
| def handleError(error, withTraceback=False): |
| print |
| if withTraceback: |
| import traceback |
| traceback.print_exc() |
| |
| sys.stderr.write("Error %s: %s\n" % (error.__class__.__name__, str(e))) |
| sys.stderr.write("Try running with -f to fix the revision\n") |
| sys.exit(1) |
| |
| |
| if __name__ == '__main__': |
| from optparse import OptionParser |
| |
| parser = OptionParser("usage: %prog [OPTIONS] REV-FILE") |
| parser.add_option("-c", "--changed-paths", |
| action="store_true", dest="dumpChanged", |
| help="Dump changed path information", default=False) |
| parser.add_option("", "--no-verify", |
| action="store_true", dest="noVerify", |
| help="Don't parse svndiff streams.", default=False) |
| parser.add_option("-i", "--instructions", |
| action="store_true", dest="dumpInstructions", |
| help="Dump instructions (implies -w)", default=False) |
| parser.add_option("-w", "--windows", |
| action="store_true", dest="dumpWindows", |
| help="Dump windows", default=False) |
| parser.add_option("-n", "--noderev-regexp", |
| action="store_true", dest="noderevRegexp", |
| help="Find all noderevs using a regexp", default=False) |
| parser.add_option("-f", "--fix-read-length-line-error", |
| action="store_true", dest="fixRlle", |
| help="Attempt to fix the read length line error", |
| default=False) |
| parser.add_option("-t", "--truncate", |
| action="store", type="string", dest="truncate", |
| help="Truncate the specified node rev.", |
| default=None) |
| parser.add_option("", "--traceback", |
| action="store_true", dest="showTraceback", |
| help="Show error tracebacks (mainly used for debugging).", |
| default=False) |
| |
| (options, args) = parser.parse_args() |
| |
| if len(args) != 1: |
| sys.stderr.write("Please specify exactly one rev file.\n") |
| parser.print_help() |
| sys.exit(1) |
| |
| checkOptions(options) |
| |
| filename = args[0] |
| |
| if options.dumpInstructions: |
| options.dumpWindows = True |
| LOG_MASK |= LOG_INSTRUCTIONS |
| |
| if options.dumpWindows: |
| LOG_MASK |= LOG_WINDOWS |
| |
| if options.truncate or options.fixRlle: |
| revFile = open(filename, 'r+b') |
| else: |
| revFile = open(filename, 'rb') |
| |
| (root, changed) = getRootAndChangedPaths(revFile) |
| |
| if options.dumpChanged: |
| revFile.seek(changed) |
| changedPaths = ChangedPaths(revFile) |
| |
| dumpChangedPaths(changedPaths) |
| sys.exit(0) |
| |
| try: |
| import re |
| match = re.match('([0-9]+)', os.path.basename(filename)) |
| currentRev = int(match.group(1), 10) |
| except: |
| raise CmdlineError( |
| "The file name must start with a decimal " + |
| "number that indicates the revision") |
| |
| if options.noderevRegexp: |
| strategy = RegexpStrategy(filename, root, currentRev) |
| else: |
| strategy = ClassicStrategy(filename, root, currentRev) |
| |
| # Make stderr the same as stdout. This helps when trying to catch all of the |
| # output from a run. |
| sys.stderr = sys.stdout |
| |
| try: |
| for noderev in strategy: |
| try: |
| if options.truncate: |
| # Check to see if this is the rev we need to truncate |
| if options.truncate == noderev.id: |
| truncate(noderev, revFile) |
| |
| else: |
| print(noderev) |
| |
| if not options.noVerify: |
| if noderev.text: |
| noderev.text.verify(revFile, |
| options.dumpInstructions, |
| options.dumpWindows) |
| |
| if noderev.props and noderev.props.rev == noderev.props.currentRev: |
| noderev.props.verify(revFile, |
| options.dumpInstructions, |
| options.dumpWindows) |
| |
| print |
| except: |
| sys.stdout.flush() |
| raise |
| except InvalidRepHeader as e: |
| if not options.fixRlle: |
| handleError(e, options.showTraceback) |
| |
| fixHeader(e, revFile) |
| |
| except PotentiallyFixableException as e: |
| if not options.fixRlle: |
| handleError(e, options.showTraceback) |
| |
| fixStream(e, revFile) |
| |