| #!/usr/bin/env python |
| # -*- coding: utf-8 -*- |
| # |
| # hot-backup.py: perform a "hot" backup of a Subversion repository |
| # and clean any old Berkeley DB logfiles after the |
| # backup completes, if the repository backend is |
| # Berkeley DB. |
| # |
| # Subversion is a tool for revision control. |
| # See http://subversion.apache.org for more information. |
| # |
| # ==================================================================== |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # ==================================================================== |
| |
| # $HeadURL$ |
| # $LastChangedDate$ |
| # $LastChangedBy$ |
| # $LastChangedRevision$ |
| |
| ###################################################################### |
| |
| import sys, os, getopt, stat, re, time, shutil, subprocess |
| import functools |
| |
| ###################################################################### |
| # Global Settings |
| |
| # Path to svnlook utility |
| svnlook = r"@SVN_BINDIR@/svnlook" |
| |
| # Path to svnadmin utility |
| svnadmin = r"@SVN_BINDIR@/svnadmin" |
| |
| # Default number of backups to keep around (0 for "keep them all") |
| num_backups = int(os.environ.get("SVN_HOTBACKUP_BACKUPS_NUMBER", 64)) |
| |
| # Archive types/extensions |
| archive_map = { |
| 'gz' : ".tar.gz", |
| 'bz2' : ".tar.bz2", |
| 'zip' : ".zip", |
| 'zip64' : ".zip" |
| } |
| |
| # Chmod recursively on a whole subtree |
| def chmod_tree(path, mode, mask): |
| for dirpath, dirs, files in os.walk(path): |
| for name in dirs + files: |
| fullname = os.path.join(dirpath, name) |
| if not os.path.islink(fullname): |
| new_mode = (os.stat(fullname)[stat.ST_MODE] & ~mask) | mode |
| os.chmod(fullname, new_mode) |
| |
| # For clearing away read-only directories |
| def safe_rmtree(dirname, retry=0): |
| "Remove the tree at DIRNAME, making it writable first" |
| def rmtree(dirname): |
| chmod_tree(dirname, 0o666, 0o666) |
| shutil.rmtree(dirname) |
| |
| if not os.path.exists(dirname): |
| return |
| |
| if retry: |
| for delay in (0.5, 1, 2, 4): |
| try: |
| rmtree(dirname) |
| break |
| except: |
| time.sleep(delay) |
| else: |
| rmtree(dirname) |
| else: |
| rmtree(dirname) |
| |
| ###################################################################### |
| # Command line arguments |
| |
| def usage(out = sys.stdout): |
| scriptname = os.path.basename(sys.argv[0]) |
| out.write( |
| """USAGE: %s [OPTIONS] REPOS_PATH BACKUP_PATH |
| |
| Create a backup of the repository at REPOS_PATH in a subdirectory of |
| the BACKUP_PATH location, named after the youngest revision. |
| |
| Options: |
| --archive-type=FMT Create an archive of the backup. FMT can be one of: |
| bz2 : Creates a bzip2 compressed tar file. |
| gz : Creates a gzip compressed tar file. |
| zip : Creates a compressed zip file. |
| zip64: Creates a zip64 file (can be > 2GB). |
| --num-backups=N Number of prior backups to keep around (0 to keep all). |
| --verify Verify the backup. |
| --help -h Print this help message and exit. |
| |
| """ % (scriptname,)) |
| |
| |
| try: |
| opts, args = getopt.gnu_getopt(sys.argv[1:], "h?", ["archive-type=", |
| "num-backups=", |
| "verify", |
| "help"]) |
| except getopt.GetoptError as e: |
| sys.stderr.write("ERROR: %s\n\n" % e) |
| sys.stderr.flush() |
| usage(sys.stderr) |
| sys.exit(2) |
| |
| archive_type = None |
| verify_copy = False |
| |
| for o, a in opts: |
| if o == "--archive-type": |
| archive_type = a |
| elif o == "--num-backups": |
| num_backups = int(a) |
| elif o == "--verify": |
| verify_copy = True |
| elif o in ("-h", "--help", "-?"): |
| usage() |
| sys.exit() |
| |
| if archive_type not in (None, 'bz2', 'gz', 'zip', 'zip64'): |
| sys.stderr.write("ERROR: Bad --archive-type\n") |
| usage(sys.stderr) |
| sys.exit(2) |
| |
| if len(args) != 2: |
| sys.stderr.write("ERROR: only two arguments allowed.\n\n") |
| sys.stderr.flush() |
| usage(sys.stderr) |
| sys.exit(2) |
| |
| # Path to repository |
| repo_dir = args[0] |
| repo = os.path.basename(os.path.abspath(repo_dir)) |
| |
| # Where to store the repository backup. The backup will be placed in |
| # a *subdirectory* of this location, named after the youngest |
| # revision. |
| backup_dir = args[1] |
| |
| # Added to the filename regexp, set when using --archive-type. |
| ext_re = "" |
| |
| # Do we want to create an archive of the backup |
| if archive_type: |
| if archive_type in archive_map: |
| # Additionally find files with the archive extension. |
| ext_re = "(" + re.escape(archive_map[archive_type]) + ")?" |
| else: |
| sys.stderr.write("Unknown archive type '%s'.\n\n\n" % archive_type) |
| sys.stderr.flush() |
| usage(sys.stderr) |
| sys.exit(2) |
| |
| |
| ###################################################################### |
| # Helper functions |
| |
| def comparator(a, b): |
| # We pass in filenames so there is never a case where they are equal. |
| regexp = re.compile("-(?P<revision>[0-9]+)(-(?P<increment>[0-9]+))?" + |
| ext_re + "$") |
| matcha = regexp.search(a) |
| matchb = regexp.search(b) |
| reva = int(matcha.groupdict()['revision']) |
| revb = int(matchb.groupdict()['revision']) |
| if (reva < revb): |
| return -1 |
| elif (reva > revb): |
| return 1 |
| else: |
| inca = matcha.groupdict()['increment'] |
| incb = matchb.groupdict()['increment'] |
| if not inca: |
| return -1 |
| elif not incb: |
| return 1 |
| elif (int(inca) < int(incb)): |
| return -1 |
| else: |
| return 1 |
| |
| def get_youngest_revision(): |
| """Examine the repository REPO_DIR using the svnlook binary |
| specified by SVNLOOK, and return the youngest revision.""" |
| |
| p = subprocess.Popen([svnlook, 'youngest', '--', repo_dir], |
| stdin=subprocess.PIPE, |
| stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE) |
| infile, outfile, errfile = p.stdin, p.stdout, p.stderr |
| |
| stdout_lines = outfile.readlines() |
| stderr_lines = errfile.readlines() |
| outfile.close() |
| infile.close() |
| errfile.close() |
| |
| if stderr_lines: |
| raise Exception("Unable to find the youngest revision for repository '%s'" |
| ": %s" % (repo_dir, stderr_lines[0].rstrip())) |
| |
| return stdout_lines[0].strip().decode() |
| |
| ###################################################################### |
| # Main |
| |
| print("Beginning hot backup of '"+ repo_dir + "'.") |
| |
| |
| ### Step 1: get the youngest revision. |
| |
| try: |
| youngest = get_youngest_revision() |
| except Exception as e: |
| sys.stderr.write("%s\n" % e) |
| sys.stderr.flush() |
| sys.exit(1) |
| |
| print("Youngest revision is %s" % youngest) |
| |
| |
| ### Step 2: Find next available backup path |
| |
| backup_subdir = os.path.join(backup_dir, repo + "-" + youngest) |
| |
| # If there is already a backup of this revision, then append the |
| # next highest increment to the path. We still need to do a backup |
| # because the repository might have changed despite no new revision |
| # having been created. We find the highest increment and add one |
| # rather than start from 1 and increment because the starting |
| # increments may have already been removed due to num_backups. |
| |
| regexp = re.compile("^" + re.escape(repo) + "-" + re.escape(youngest) + |
| "(-(?P<increment>[0-9]+))?" + ext_re + "$") |
| directory_list = os.listdir(backup_dir) |
| young_list = [x for x in directory_list if regexp.search(x)] |
| if young_list: |
| young_list.sort(key = functools.cmp_to_key(comparator)) |
| increment = regexp.search(young_list.pop()).groupdict()['increment'] |
| if increment: |
| backup_subdir = os.path.join(backup_dir, repo + "-" + youngest + "-" |
| + str(int(increment) + 1)) |
| else: |
| backup_subdir = os.path.join(backup_dir, repo + "-" + youngest + "-1") |
| |
| ### Step 3: Ask subversion to make a hot copy of a repository. |
| ### copied last. |
| |
| print("Backing up repository to '" + backup_subdir + "'...") |
| err_code = subprocess.call([svnadmin, "hotcopy", "--clean-logs", |
| '--', repo_dir, backup_subdir]) |
| if err_code != 0: |
| sys.stderr.write("Unable to backup the repository.\n") |
| sys.stderr.flush() |
| sys.exit(err_code) |
| else: |
| print("Done.") |
| |
| ### Step 4: Verify the hotcopy |
| if verify_copy: |
| print("Verifying backup...") |
| err_code = subprocess.call([svnadmin, "verify", "--quiet", '--', backup_subdir]) |
| if err_code != 0: |
| sys.stderr.write("Backup verification failed.\n") |
| sys.stderr.flush() |
| sys.exit(err_code) |
| else: |
| print("Done.") |
| |
| ### Step 5: Make an archive of the backup if required. |
| if archive_type: |
| archive_path = backup_subdir + archive_map[archive_type] |
| err_msg = "" |
| |
| print("Archiving backup to '" + archive_path + "'...") |
| if archive_type == 'gz' or archive_type == 'bz2': |
| try: |
| import tarfile |
| tar = tarfile.open(archive_path, 'w:' + archive_type) |
| tar.add(backup_subdir, os.path.basename(backup_subdir)) |
| tar.close() |
| except ImportError as e: |
| err_msg = "Import failed: " + str(e) |
| err_code = -2 |
| except tarfile.TarError as e: |
| err_msg = "Tar failed: " + str(e) |
| err_code = -3 |
| |
| elif archive_type == 'zip' or archive_type == 'zip64': |
| try: |
| import zipfile |
| |
| def add_to_zip(zp, root, dirname, names): |
| root = os.path.join(root, '') |
| |
| for file in names: |
| path = os.path.join(dirname, file) |
| if os.path.isfile(path): |
| zp.write(path, path[len(root):]) |
| elif os.path.isdir(path) and os.path.islink(path): |
| for dirpath, dirs, files in os.walk(path): |
| add_to_zip(zp, path, dirpath, dirs + files) |
| |
| zp = zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED, archive_type == 'zip64') |
| for dirpath, dirs, files in os.walk(backup_subdir): |
| add_to_zip(zp, backup_dir, dirpath, dirs + files) |
| zp.close() |
| except ImportError as e: |
| err_msg = "Import failed: " + str(e) |
| err_code = -4 |
| except zipfile.error as e: |
| err_msg = "Zip failed: " + str(e) |
| err_code = -5 |
| |
| |
| if err_code != 0: |
| sys.stderr.write("Unable to create an archive for the backup.\n%s\n" % err_msg) |
| sys.stderr.flush() |
| sys.exit(err_code) |
| else: |
| print("Archive created, removing backup '" + backup_subdir + "'...") |
| safe_rmtree(backup_subdir, 1) |
| |
| ### Step 6: finally, remove all repository backups other than the last |
| ### NUM_BACKUPS. |
| |
| if num_backups > 0: |
| regexp = re.compile("^" + re.escape(repo) + "-[0-9]+(-[0-9]+)?" + ext_re + "$") |
| directory_list = os.listdir(backup_dir) |
| old_list = [x for x in directory_list if regexp.search(x)] |
| old_list.sort(key = functools.cmp_to_key(comparator)) |
| del old_list[max(0,len(old_list)-num_backups):] |
| for item in old_list: |
| old_backup_item = os.path.join(backup_dir, item) |
| print("Removing old backup: " + old_backup_item) |
| if os.path.isdir(old_backup_item): |
| safe_rmtree(old_backup_item, 1) |
| else: |
| os.remove(old_backup_item) |