| #!/usr/bin/env python |
| |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| """ |
| A script that takes a .svn/pristine/ hierarchy, with its existing |
| .svn/wc.db database, and populates the database's PRISTINE table |
| accordingly. (Use 'svn cleanup' to remove unreferenced pristines.) |
| |
| Usage: |
| |
| %s /path/to/wc [...] |
| """ |
| |
| # TODO: resolve the NotImplemented() in __main__ |
| |
| # TODO: increment refcount upon collision |
| # TODO: add <given file>, not just argv[1]/.svn/pristine/??/* |
| |
| import hashlib |
| import os |
| import re |
| import sqlite3 |
| import sys |
| |
| # ### This could require any other format that has the same PRISTINE schema |
| # ### and semantics. |
| FORMAT = 22 |
| BUFFER_SIZE = 4 * 1024 |
| |
| class UnknownFormat(Exception): |
| def __init__(self, formatno): |
| self.formatno = formatno |
| |
| def open_db(wc_path): |
| wc_db = os.path.join(wc_path, '.svn', 'wc.db') |
| conn = sqlite3.connect(wc_db) |
| curs = conn.cursor() |
| curs.execute('pragma user_version;') |
| formatno = int(curs.fetchone()[0]) |
| if formatno > FORMAT: |
| raise UnknownFormat(formatno) |
| return conn |
| |
| _sha1_re = re.compile(r'^[0-9a-f]{40}$') |
| |
| def md5_of(path): |
| fd = os.open(path, os.O_RDONLY) |
| ctx = hashlib.md5() |
| while True: |
| s = os.read(fd, BUFFER_SIZE) |
| if len(s): |
| ctx.update(s) |
| else: |
| os.close(fd) |
| return ctx.hexdigest() |
| |
| INSERT_QUERY = """ |
| INSERT OR REPLACE |
| INTO pristine(checksum,compression,size,refcount,md5_checksum) |
| VALUES (?,?,?,?,?) |
| """ |
| |
| def populate(wc_path): |
| conn = open_db(wc_path) |
| sys.stdout.write("Updating '%s': " % wc_path) |
| for dirname, dirs, files in os.walk(os.path.join(wc_path, '.svn/pristine/')): |
| # skip everything but .svn/pristine/xx/ |
| if os.path.basename(os.path.dirname(dirname)) == 'pristine': |
| sys.stdout.write("'%s', " % os.path.basename(dirname)) |
| for f in filter(lambda x: _sha1_re.match(x), files): |
| fullpath = os.path.join(dirname, f) |
| conn.execute(INSERT_QUERY, |
| ('$sha1$'+f, None, os.stat(fullpath).st_size, 1, |
| '$md5 $'+md5_of(fullpath))) |
| # periodic transaction commits, for efficiency |
| conn.commit() |
| else: |
| sys.stdout.write(".\n") |
| |
| if __name__ == '__main__': |
| raise NotImplemented("""Subversion does not know yet to avoid fetching |
| a file when a file with matching sha1 appears in the PRISTINE table.""") |
| |
| paths = sys.argv[1:] |
| if not paths: |
| paths = ['.'] |
| for wc_path in paths: |
| try: |
| populate(wc_path) |
| except UnknownFormat as e: |
| sys.stderr.write("Don't know how to handle '%s' (format %d)'\n" |
| % (wc_path, e.formatno)) |