| #!/bin/sh |
| |
| ########################################################################### |
| # # |
| # This shell script demonstrates a backup/restore recipe for live # |
| # Subversion repositories, using a standard full+incrementals process. # |
| # # |
| # This script is intended only as an example; the idea is that you # |
| # can read over it, understand how it works (it's extensively commented) # |
| # and then implement real backup and restore scripts based on this # |
| # recipe. # |
| # # |
| # To reiterate: this is *not* a backup and restore solution. It's # |
| # really just documentation, in the form of code with comments. # |
| # # |
| # If you do implement your own scripts based on the recipe here, and # |
| # your implementations are generic enough to be generally useful, # |
| # please post them to dev@subversion.tigris.org. It would be great if # |
| # we could offer a real solution, and not just a description of one. # |
| # # |
| # This recipe is distilled from the Berkeley DB documentation, see # |
| # http://www.sleepycat.com/docs/ref/transapp/archival.html. # |
| # # |
| # See also http://www.sleepycat.com/docs/ref/transapp/reclimit.html for # |
| # for possible problems using standard 'cp' in this recipe. # |
| # # |
| ########################################################################### |
| |
| # High-level overview of the full backup recipe: |
| # |
| # 1. Ask BDB's db_archive for a list of unused log files. |
| # |
| # 2. Copy the entire db/ dir to the backup area. |
| # |
| # 3. Recopy all the logfiles to the backup area. There may be more |
| # logfiles now than there were when step (1) ran. |
| # |
| # 4. Remove the logfiles listed as inactive in step (1) from the |
| # repository, though not from the backup. |
| # |
| # High-level overview of the incremental backup recipe: |
| # |
| # 1. Just copy the Berkeley logfiles to a backup area. |
| # |
| # High-level overview of the restoration recipe: |
| # |
| # 1. Copy all the datafiles and logfiles back to the repository, in |
| # the same order they were backed up. |
| # |
| # 2. Run Berkeley's "catastrophic recovery" command on the repository. |
| # |
| # That's it. Here we go... |
| |
| # You might need to customize some of these paths. |
| SVN=svn |
| SVNADMIN=svnadmin |
| SVNLOOK=svnlook |
| # See http://www.sleepycat.com/docs/utility/db_archive.html: |
| DB_ARCHIVE=/usr/local/BerkeleyDB.4.2/bin/db_archive |
| # See http://www.sleepycat.com/docs/utility/db_recover.html: |
| DB_RECOVER=/usr/local/BerkeleyDB.4.2/bin/db_recover |
| |
| # This is just source data to generate repository activity. |
| # Any binary file of about 64k will do, it doesn't have to be /bin/ls. |
| DATA_BLOB=/bin/ls |
| |
| # You shouldn't need to customize below here. |
| SANDBOX=`pwd`/backups-test-tmp |
| FULL_BACKUPS=${SANDBOX}/full |
| INCREMENTAL_PREFIX=${SANDBOX}/incremental-logs |
| RECORDS=${SANDBOX}/records |
| PROJ=myproj |
| REPOS=${PROJ}-repos |
| |
| rm -rf ${SANDBOX} |
| mkdir ${SANDBOX} |
| mkdir ${RECORDS} |
| |
| cd ${SANDBOX} |
| |
| ${SVNADMIN} create --bdb-log-keep ${REPOS} |
| ${SVN} co file://${SANDBOX}/${REPOS} wc |
| |
| cd wc |
| |
| # Put in enough data for us to exercise the logfiles. |
| cp ${DATA_BLOB} ./a1 |
| cp ${DATA_BLOB} ./b1 |
| cp ${DATA_BLOB} ./c1 |
| ${SVN} -q add a1 b1 c1 |
| ${SVN} -q ci -m "Initial add." |
| |
| echo "Created test data." |
| |
| cd .. |
| |
| # Exercise the logfiles by moving data around a lot. Note that we |
| # avoid adds-with-history, since those cause much less Berkeley |
| # activity than plain adds. |
| # |
| # Call this from the parent of wc, that is, with $SANDBOX as CWD. |
| # Pass one argument, a number, indicating how many cycles of exercise |
| # you want. The more cycles, the more logfiles will be generated. |
| # The ratio is about two cycles per logfile. |
| function exercise |
| { |
| limit=${1} |
| |
| saved_cwd=`pwd` |
| cd ${SANDBOX}/wc |
| |
| echo "" |
| i=1 |
| while [ ${i} -le ${limit} ]; do |
| mv a1 a2 |
| mv b1 b2 |
| mv c1 c2 |
| ${SVN} -q rm a1 b1 c1 |
| ${SVN} -q add a2 b2 c2 |
| ${SVN} -q ci -m "Move 1s to 2s, but not as cheap copies." |
| |
| mv a2 a1 |
| mv b2 b1 |
| mv c2 c1 |
| ${SVN} -q rm a2 b2 c2 |
| ${SVN} -q add a1 b1 c1 |
| ${SVN} -q ci -m "Move 2s back to 1s, same way." |
| |
| echo "Exercising repository, pass ${i} of ${limit}." |
| i=`dc -e "${i} 1 + p"` |
| done |
| echo "" |
| |
| cd ${saved_cwd} |
| } |
| |
| # Generate some logfile activity. |
| exercise 10 |
| |
| # Do a full backup. |
| head=`${SVNLOOK} youngest ${REPOS}` |
| echo "Starting full backup (at r${head})..." |
| mkdir ${FULL_BACKUPS} |
| mkdir ${FULL_BACKUPS}/${PROJ} |
| mkdir ${FULL_BACKUPS}/${PROJ}/repos |
| mkdir ${FULL_BACKUPS}/${PROJ}/logs |
| cd ${REPOS}/db |
| ${DB_ARCHIVE} > ${RECORDS}/${PROJ}-full-backup-inactive-logfiles |
| cd ../.. |
| cp -a ${REPOS} ${FULL_BACKUPS}/${PROJ}/repos/ |
| cd ${REPOS}/db |
| for logfile in `${DB_ARCHIVE} -l`; do |
| # For maximum paranoia, we want repository activity *while* we're |
| # making the full backup. |
| exercise 5 |
| cp ${logfile} ${FULL_BACKUPS}/${PROJ}/logs |
| done |
| cat ${RECORDS}/${PROJ}-full-backup-inactive-logfiles | xargs rm -f |
| cd ../.. |
| echo "Full backup completed (r${head} was head when started)." |
| |
| # Do the incremental backups for a nominal week. |
| for day in 1 2 3 4 5 6; do |
| exercise 5 |
| head=`${SVNLOOK} youngest ${REPOS}` |
| echo "Starting incremental backup ${day} (at r${head})..." |
| mkdir ${INCREMENTAL_PREFIX}-${day} |
| mkdir ${INCREMENTAL_PREFIX}-${day}/${PROJ} |
| cd ${REPOS}/db |
| ${DB_ARCHIVE} > ${RECORDS}/${PROJ}-incr-backup-${day}-inactive-logfiles |
| for logfile in `${DB_ARCHIVE} -l`; do |
| # For maximum paranoia, we want repository activity *while* we're |
| # making the incremental backup. But if we did commits with each |
| # logfile copy, this script would be quite slow (Fibonacci effect). |
| # So we only exercise on the last two "days" of incrementals. |
| if [ ${day} -ge 5 ]; then |
| exercise 3 |
| fi |
| cp ${logfile} ${INCREMENTAL_PREFIX}-${day}/${PROJ} |
| done |
| cat ${RECORDS}/${PROJ}-incr-backup-${day}-inactive-logfiles | xargs rm -f |
| cd ../.. |
| echo "Incremental backup ${day} done (r${head} was head when started)." |
| done |
| |
| # The last revision a restoration is guaranteed to contain is whatever |
| # was head at the start of the last incremental backup. |
| last_guaranteed_rev=${head} |
| |
| # Make the repository vanish, so we can restore it. |
| mv ${REPOS} was_${REPOS} |
| |
| echo "" |
| echo "Oliver Cromwell has destroyed the repository! Restoration coming |
| up..." |
| echo "" |
| |
| # Restore. |
| # |
| # After copying the full repository backup over, we remove the shared |
| # memory segments and the dav/* stuff. Recovery recreates the shmem |
| # segments, and anything in dav/* is certainly obsolete if we're doing |
| # a restore. |
| # |
| # Note that we use db_recover instead of 'svnadmin recover'. This is |
| # because we want to pass the -c ('catastrophic') flag to db_recover. |
| # As of Subversion 1.0.x, there is no '--catastrophic' flag to |
| # 'svnadmin recover', unfortunately. |
| cp -a ${FULL_BACKUPS}/${PROJ}/repos/${REPOS} . |
| cp -a ${FULL_BACKUPS}/${PROJ}/logs/* ${REPOS}/db |
| rm -rf ${REPOS}/db/__db* |
| rm -rf ${REPOS}/dav/* |
| cd ${REPOS}/db |
| ${DB_RECOVER} -ce |
| cd ../.. |
| head=`${SVNLOOK} youngest ${REPOS}` |
| echo "" |
| echo "(Restored from full backup to r${head}...)" |
| for day in 1 2 3 4 5 6; do |
| cd ${REPOS}/db |
| cp ${INCREMENTAL_PREFIX}-${day}/${PROJ}/* . |
| ${DB_RECOVER} -ce |
| cd ../.. |
| head=`${SVNLOOK} youngest ${REPOS}` |
| echo "(Restored from incremental-${day} to r${head}...)" |
| done |
| echo "" |
| echo "Restoration complete. All hail the King." |
| |
| # Verify the restoration. |
| was_head=`${SVNLOOK} youngest was_${REPOS}` |
| restored_head=`${SVNLOOK} youngest ${REPOS}` |
| echo "" |
| echo "Highest revision in original repository: ${was_head}" |
| echo "Highest revision restored: ${restored_head}" |
| echo "" |
| echo "(It's okay if restored is less than original, even much less.)" |
| |
| if [ ${restored_head} -lt ${last_guaranteed_rev} ]; then |
| echo "" |
| echo "Restoration failed because r${restored_head} is too low --" |
| echo "should have restored to at least r${last_guaranteed_rev}." |
| exit 1 |
| fi |
| |
| # Looks like we restored at least to the minimum required revision. |
| # Let's do some spot checks, though. |
| |
| echo "" |
| echo "Comparing logs up to r${restored_head} for both repositories..." |
| ${SVN} log -v -r1:${restored_head} file://`pwd`/was_${REPOS} > a |
| ${SVN} log -v -r1:${restored_head} file://`pwd`/${REPOS} > b |
| if cmp a b; then |
| echo "Done comparing logs." |
| else |
| echo "Log comparison failed -- restored repository is not right." |
| exit 1 |
| fi |
| |
| echo "" |
| echo "Comparing r${restored_head} exported trees from both repositories..." |
| ${SVN} -q export -r${restored_head} file://`pwd`/was_${REPOS} orig-export |
| ${SVN} -q export -r${restored_head} file://`pwd`/${REPOS} restored-export |
| if diff -q -r orig-export restored-export; then |
| echo "Done comparing r${restored_head} exported trees." |
| else |
| echo "Recursive diff failed -- restored repository is not right." |
| fi |
| |
| echo "" |
| echo "Done." |