blob: c9f234fb828a070b017ae16d5469d500289f7a2a [file] [log] [blame]
#!/usr/bin/env python
#
# hot-backup.py: perform a "hot" backup of a Berkeley DB repository.
# (and clean old logfiles after backup completes.)
#
# Subversion is a tool for revision control.
# See http://subversion.tigris.org for more information.
#
# ====================================================================
# Copyright (c) 2000-2003 CollabNet. All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://subversion.tigris.org/license-1.html.
# If newer versions of this license are posted there, you may use a
# newer version instead, at your option.
#
# This software consists of voluntary contributions made by many
# individuals. For exact contribution history, see the revision
# history and logs, available at http://subversion.tigris.org/.
# ====================================================================
######################################################################
import sys, os, shutil, string, re
######################################################################
# Global Settings
# Path to svnlook utility
svnlook = "@SVN_BINDIR@/svnlook"
# Path to db_archive program
db_archive = "/usr/local/BerkeleyDB.4.0/bin/db_archive"
# Path to db_recover progrem
db_recover = "/usr/local/BerkeleyDB.4.0/bin/db_recover"
# Number of backups to keep around (0 for "keep them all")
num_backups = 64
######################################################################
# Command line arguments
if len(sys.argv) != 3:
print "Usage: ", os.path.basename(sys.argv[0]), " <repos_path> <backup_path>"
sys.exit(1)
# Path to repository
repo_dir = sys.argv[1]
repo = os.path.basename(os.path.abspath(repo_dir))
# Where to store the repository backup. The backup will be placed in
# a *subdirectory* of this location, named after the youngest
# revision.
backup_dir = sys.argv[2]
######################################################################
# Helper functions
def comparator(a, b):
# We pass in filenames so there is never a case where they are equal.
regexp = re.compile("-(?P<revision>[0-9]+)(-(?P<increment>[0-9]+))?$")
matcha = regexp.search(a)
matchb = regexp.search(b)
reva = int(matcha.groupdict()['revision'])
revb = int(matchb.groupdict()['revision'])
if (reva < revb):
return -1
elif (reva > revb):
return 1
else:
inca = matcha.groupdict()['increment']
incb = matchb.groupdict()['increment']
if not inca:
return -1
elif not incb:
return 1;
elif (int(inca) < int(incb)):
return -1
else:
return 1
######################################################################
# Main
print "Beginning hot backup of '"+ repo_dir + "'."
### Step 1: get the youngest revision.
infile, outfile, errfile = os.popen3(svnlook + " youngest " + repo_dir)
stdout_lines = outfile.readlines()
stderr_lines = errfile.readlines()
outfile.close()
infile.close()
errfile.close()
youngest = string.strip(stdout_lines[0])
print "Youngest revision is", youngest
### Step 2: copy the whole repository structure.
backup_subdir = os.path.join(backup_dir, repo + "-" + youngest)
# If there is already a backup of this revision, then append the
# next highest increment to the path. We still need to do a backup
# because the repository might have changed despite no new revision
# having been created. We find the highest increment and add one
# rather than start from 1 and increment because the starting
# increments may have already been removed due to num_backups.
regexp = re.compile("^" + repo + "-" + youngest + "(-(?P<increment>[0-9]+))?$")
directory_list = os.listdir(backup_dir)
young_list = filter(lambda x: regexp.search(x), directory_list)
if young_list:
young_list.sort(comparator)
increment = regexp.search(young_list.pop()).groupdict()['increment']
if increment:
backup_subdir = os.path.join(backup_dir, repo + "-" + youngest + "-"
+ str(int(increment) + 1))
else:
backup_subdir = os.path.join(backup_dir, repo + "-" + youngest + "-1")
print "Backing up repository to '" + backup_subdir + "'..."
shutil.copytree(repo_dir, backup_subdir)
print "Done."
### Step 3: re-copy the Berkeley logfiles. They must *always* be
### copied last.
infile, outfile, errfile = os.popen3(db_archive + " -l -h "
+ os.path.join(repo_dir, "db"))
stdout_lines = outfile.readlines()
stderr_lines = errfile.readlines()
outfile.close()
infile.close()
errfile.close()
print "Re-copying logfiles:"
for item in stdout_lines:
logfile = string.strip(item)
src = os.path.join(repo_dir, "db", logfile)
dst = os.path.join(backup_subdir, "db", logfile)
print " Re-copying logfile '" + logfile + "'..."
shutil.copy(src, dst)
print "Backup completed."
### Step 4: put the archived database in a consistent state and remove
### the shared-memory environment files.
infile, outfile, errfile = os.popen3(db_recover + " -h "
+ os.path.join(backup_subdir, "db"))
stdout_lines = outfile.readlines()
stderr_lines = errfile.readlines()
outfile.close()
infile.close()
errfile.close()
print "Running db_recover on the archived database:"
map(sys.stdout.write, stdout_lines)
map(sys.stdout.write, stderr_lines)
print "Done."
### Step 5: look for a write `lock' file in the backup area, else make one.
lockpath = os.path.join(backup_dir, repo + 'lock')
if os.path.exists(lockpath):
print "Cannot cleanup logs: lockfile already exists in", backup_dir
sys.exit(0)
print "Writing lock for logfile cleanup..."
fp = open(lockpath, 'a') # open in (a)ppend mode
fp.write("cleaning logfiles for repository " + repo_dir)
fp.close()
### Step 6: ask db_archive which of the live logfiles can be
### expunged, and remove them.
infile, outfile, errfile = os.popen3(db_archive + " -a -h "
+ os.path.join(repo_dir, "db"))
stdout_lines = outfile.readlines()
stderr_lines = errfile.readlines()
outfile.close()
infile.close()
errfile.close()
print "Cleaning obsolete logfiles:"
for item in stdout_lines:
logfile = string.strip(item)
print " Deleting '", logfile, "'..."
os.unlink(logfile)
print "Done."
### Step 7: remove the write lock.
os.unlink(lockpath)
print "Lock removed. Cleanup complete."
### Step 8: finally, remove all repository backups other than the last
### NUM_BACKUPS.
if num_backups > 0:
regexp = re.compile("^" + repo + "-[0-9]+(-[0-9]+)?$")
directory_list = os.listdir(backup_dir)
old_list = filter(lambda x: regexp.search(x), directory_list)
old_list.sort(comparator)
del old_list[max(0,len(old_list)-num_backups):]
for item in old_list:
old_backup_subdir = os.path.join(backup_dir, item)
print "Removing old backup: " + old_backup_subdir
shutil.rmtree(old_backup_subdir)