blob: daab24bd67e00513d923effa4846a13e85ac412d [file] [log] [blame]
#!/bin/sh
###########################################################################
# #
# This shell script demonstrates a backup/restore recipe for live #
# Subversion repositories, using a standard full+incrementals process. #
# #
# This script is intended only as an example; the idea is that you #
# can read over it, understand how it works (it's extensively commented) #
# and then implement real backup and restore scripts based on this #
# recipe. #
# #
# To reiterate: this is *not* a backup and restore solution. It's #
# really just documentation, in the form of code with comments. #
# #
# If you do implement your own scripts based on the recipe here, and #
# your implementations are generic enough to be generally useful, #
# please post them to dev@subversion.tigris.org. It would be great if #
# we could offer a real solution, and not just a description of one. #
# #
# This recipe is distilled from the Berkeley DB documentation, see #
# http://www.sleepycat.com/docs/ref/transapp/archival.html. #
# #
# See also http://www.sleepycat.com/docs/ref/transapp/reclimit.html for #
# for possible problems using standard 'cp' in this recipe. #
# #
###########################################################################
# High-level overview of the full backup recipe:
#
# 1. Ask BDB's db_archive for a list of unused log files.
#
# 2. Copy the entire db/ dir to the backup area.
#
# 3. Recopy all the logfiles to the backup area. There may be more
# logfiles now than there were when step (1) ran.
#
# 4. Remove the logfiles listed as inactive in step (1) from the
# repository, though not from the backup.
#
# High-level overview of the incremental backup recipe:
#
# 1. Just copy the Berkeley logfiles to a backup area.
#
# High-level overview of the restoration recipe:
#
# 1. Copy all the datafiles and logfiles back to the repository, in
# the same order they were backed up.
#
# 2. Run Berkeley's "catastrophic recovery" command on the repository.
#
# That's it. Here we go...
# You might need to customize some of these paths.
SVN=svn
SVNADMIN=svnadmin
SVNLOOK=svnlook
# See http://www.sleepycat.com/docs/utility/db_archive.html:
DB_ARCHIVE=/usr/local/BerkeleyDB.4.2/bin/db_archive
# See http://www.sleepycat.com/docs/utility/db_recover.html:
DB_RECOVER=/usr/local/BerkeleyDB.4.2/bin/db_recover
# This is just source data to generate repository activity.
# Any binary file of about 64k will do, it doesn't have to be /bin/ls.
DATA_BLOB=/bin/ls
# You shouldn't need to customize below here.
SANDBOX=`pwd`/backups-test-tmp
FULL_BACKUPS=${SANDBOX}/full
INCREMENTAL_PREFIX=${SANDBOX}/incremental-logs
RECORDS=${SANDBOX}/records
PROJ=myproj
REPOS=${PROJ}-repos
rm -rf ${SANDBOX}
mkdir ${SANDBOX}
mkdir ${RECORDS}
cd ${SANDBOX}
${SVNADMIN} create --bdb-log-keep ${REPOS}
${SVN} co file://${SANDBOX}/${REPOS} wc
cd wc
# Put in enough data for us to exercise the logfiles.
cp ${DATA_BLOB} ./a1
cp ${DATA_BLOB} ./b1
cp ${DATA_BLOB} ./c1
${SVN} -q add a1 b1 c1
${SVN} -q ci -m "Initial add."
echo "Created test data."
cd ..
# Exercise the logfiles by moving data around a lot. Note that we
# avoid adds-with-history, since those cause much less Berkeley
# activity than plain adds.
#
# Call this from the parent of wc, that is, with $SANDBOX as CWD.
# Pass one argument, a number, indicating how many cycles of exercise
# you want. The more cycles, the more logfiles will be generated.
# The ratio is about two cycles per logfile.
function exercise
{
limit=${1}
saved_cwd=`pwd`
cd ${SANDBOX}/wc
echo ""
i=1
while [ ${i} -le ${limit} ]; do
mv a1 a2
mv b1 b2
mv c1 c2
${SVN} -q rm a1 b1 c1
${SVN} -q add a2 b2 c2
${SVN} -q ci -m "Move 1s to 2s, but not as cheap copies."
mv a2 a1
mv b2 b1
mv c2 c1
${SVN} -q rm a2 b2 c2
${SVN} -q add a1 b1 c1
${SVN} -q ci -m "Move 2s back to 1s, same way."
echo "Exercising repository, pass ${i} of ${limit}."
i=`dc -e "${i} 1 + p"`
done
echo ""
cd ${saved_cwd}
}
# Generate some logfile activity.
exercise 10
# Do a full backup.
head=`${SVNLOOK} youngest ${REPOS}`
echo "Starting full backup (at r${head})..."
mkdir ${FULL_BACKUPS}
mkdir ${FULL_BACKUPS}/${PROJ}
mkdir ${FULL_BACKUPS}/${PROJ}/repos
mkdir ${FULL_BACKUPS}/${PROJ}/logs
cd ${REPOS}/db
${DB_ARCHIVE} > ${RECORDS}/${PROJ}-full-backup-inactive-logfiles
cd ../..
cp -a ${REPOS} ${FULL_BACKUPS}/${PROJ}/repos/
cd ${REPOS}/db
for logfile in `${DB_ARCHIVE} -l`; do
# For maximum paranoia, we want repository activity *while* we're
# making the full backup.
exercise 5
cp ${logfile} ${FULL_BACKUPS}/${PROJ}/logs
done
cat ${RECORDS}/${PROJ}-full-backup-inactive-logfiles | xargs rm -f
cd ../..
echo "Full backup completed (r${head} was head when started)."
# Do the incremental backups for a nominal week.
for day in 1 2 3 4 5 6; do
exercise 5
head=`${SVNLOOK} youngest ${REPOS}`
echo "Starting incremental backup ${day} (at r${head})..."
mkdir ${INCREMENTAL_PREFIX}-${day}
mkdir ${INCREMENTAL_PREFIX}-${day}/${PROJ}
cd ${REPOS}/db
${DB_ARCHIVE} > ${RECORDS}/${PROJ}-incr-backup-${day}-inactive-logfiles
for logfile in `${DB_ARCHIVE} -l`; do
# For maximum paranoia, we want repository activity *while* we're
# making the incremental backup. But if we did commits with each
# logfile copy, this script would be quite slow (Fibonacci effect).
# So we only exercise on the last two "days" of incrementals.
if [ ${day} -ge 5 ]; then
exercise 3
fi
cp ${logfile} ${INCREMENTAL_PREFIX}-${day}/${PROJ}
done
cat ${RECORDS}/${PROJ}-incr-backup-${day}-inactive-logfiles | xargs rm -f
cd ../..
echo "Incremental backup ${day} done (r${head} was head when started)."
done
# The last revision a restoration is guaranteed to contain is whatever
# was head at the start of the last incremental backup.
last_guaranteed_rev=${head}
# Make the repository vanish, so we can restore it.
mv ${REPOS} was_${REPOS}
echo ""
echo "Oliver Cromwell has destroyed the repository! Restoration coming
up..."
echo ""
# Restore.
#
# After copying the full repository backup over, we remove the shared
# memory segments and the dav/* stuff. Recovery recreates the shmem
# segments, and anything in dav/* is certainly obsolete if we're doing
# a restore.
#
# Note that we use db_recover instead of 'svnadmin recover'. This is
# because we want to pass the -c ('catastrophic') flag to db_recover.
# As of Subversion 1.0.x, there is no '--catastrophic' flag to
# 'svnadmin recover', unfortunately.
cp -a ${FULL_BACKUPS}/${PROJ}/repos/${REPOS} .
cp -a ${FULL_BACKUPS}/${PROJ}/logs/* ${REPOS}/db
rm -rf ${REPOS}/db/__db*
rm -rf ${REPOS}/dav/*
cd ${REPOS}/db
${DB_RECOVER} -ce
cd ../..
head=`${SVNLOOK} youngest ${REPOS}`
echo ""
echo "(Restored from full backup to r${head}...)"
for day in 1 2 3 4 5 6; do
cd ${REPOS}/db
cp ${INCREMENTAL_PREFIX}-${day}/${PROJ}/* .
${DB_RECOVER} -ce
cd ../..
head=`${SVNLOOK} youngest ${REPOS}`
echo "(Restored from incremental-${day} to r${head}...)"
done
echo ""
echo "Restoration complete. All hail the King."
# Verify the restoration.
was_head=`${SVNLOOK} youngest was_${REPOS}`
restored_head=`${SVNLOOK} youngest ${REPOS}`
echo ""
echo "Highest revision in original repository: ${was_head}"
echo "Highest revision restored: ${restored_head}"
echo ""
echo "(It's okay if restored is less than original, even much less.)"
if [ ${restored_head} -lt ${last_guaranteed_rev} ]; then
echo ""
echo "Restoration failed because r${restored_head} is too low --"
echo "should have restored to at least r${last_guaranteed_rev}."
exit 1
fi
# Looks like we restored at least to the minimum required revision.
# Let's do some spot checks, though.
echo ""
echo "Comparing logs up to r${restored_head} for both repositories..."
${SVN} log -v -r1:${restored_head} file://`pwd`/was_${REPOS} > a
${SVN} log -v -r1:${restored_head} file://`pwd`/${REPOS} > b
if cmp a b; then
echo "Done comparing logs."
else
echo "Log comparison failed -- restored repository is not right."
exit 1
fi
echo ""
echo "Comparing r${restored_head} exported trees from both repositories..."
${SVN} -q export -r${restored_head} file://`pwd`/was_${REPOS} orig-export
${SVN} -q export -r${restored_head} file://`pwd`/${REPOS} restored-export
if diff -q -r orig-export restored-export; then
echo "Done comparing r${restored_head} exported trees."
else
echo "Recursive diff failed -- restored repository is not right."
fi
echo ""
echo "Done."