contrib/server-side/backup-recipe.sh - subversion - Git at Google

 #!/bin/sh

 ###########################################################################
 #                                                                         #
 #  This shell script demonstrates a backup/restore recipe for live        #
 #  Subversion repositories, using a standard full+incrementals process.   #
 #                                                                         #
 #  This script is intended only as an example; the idea is that you       #
 #  can read over it, understand how it works (it's extensively commented) #
 #  and then implement real backup and restore scripts based on this       #
 #  recipe.                                                                #
 #                                                                         #
 #  To reiterate: this is *not* a backup and restore solution.  It's       #
 #  really just documentation, in the form of code with comments.          #
 #                                                                         #
 #  If you do implement your own scripts based on the recipe here, and     #
 #  your implementations are generic enough to be generally useful,        #
 #  please post them to dev@subversion.tigris.org.  It would be great if   #
 #  we could offer a real solution, and not just a description of one.     #
 #                                                                         #
 #  This recipe is distilled from the Berkeley DB documentation, see       #
 #  http://www.sleepycat.com/docs/ref/transapp/archival.html.              #
 #                                                                         #
 #  See also http://www.sleepycat.com/docs/ref/transapp/reclimit.html for  #
 #  for possible problems using standard 'cp' in this recipe.              #
 #                                                                         #
 ###########################################################################

 # High-level overview of the full backup recipe:
 #
 #    1. Ask BDB's db_archive for a list of unused log files.
 #
 #    2. Copy the entire db/ dir to the backup area.
 #
 #    3. Recopy all the logfiles to the backup area.  There may be more
 #       logfiles now than there were when step (1) ran.
 #
 #    4. Remove the logfiles listed as inactive in step (1) from the
 #       repository, though not from the backup.
 #
 # High-level overview of the incremental backup recipe:
 #
 #    1. Just copy the Berkeley logfiles to a backup area.
 #
 # High-level overview of the restoration recipe:
 #
 #    1. Copy all the datafiles and logfiles back to the repository, in
 #       the same order they were backed up.
 #
 #    2. Run Berkeley's "catastrophic recovery" command on the repository.
 #
 # That's it.  Here we go...

 # You might need to customize some of these paths.
 SVN=svn
 SVNADMIN=svnadmin
 SVNLOOK=svnlook
 # See http://www.sleepycat.com/docs/utility/db_archive.html:
 DB_ARCHIVE=/usr/local/BerkeleyDB.4.2/bin/db_archive
 # See http://www.sleepycat.com/docs/utility/db_recover.html:
 DB_RECOVER=/usr/local/BerkeleyDB.4.2/bin/db_recover

 # This is just source data to generate repository activity.
 # Any binary file of about 64k will do, it doesn't have to be /bin/ls.
 DATA_BLOB=/bin/ls

 # You shouldn't need to customize below here.
 SANDBOX=`pwd`/backups-test-tmp
 FULL_BACKUPS=${SANDBOX}/full
 INCREMENTAL_PREFIX=${SANDBOX}/incremental-logs
 RECORDS=${SANDBOX}/records
 PROJ=myproj
 REPOS=${PROJ}-repos

 rm -rf ${SANDBOX}
 mkdir ${SANDBOX}
 mkdir ${RECORDS}

 cd ${SANDBOX}

 ${SVNADMIN} create --bdb-log-keep ${REPOS}
 ${SVN} co file://${SANDBOX}/${REPOS} wc

 cd wc

 # Put in enough data for us to exercise the logfiles.
 cp ${DATA_BLOB} ./a1
 cp ${DATA_BLOB} ./b1
 cp ${DATA_BLOB} ./c1
 ${SVN} -q add a1 b1 c1
 ${SVN} -q ci -m "Initial add."

 echo "Created test data."

 cd ..

 # Exercise the logfiles by moving data around a lot.  Note that we
 # avoid adds-with-history, since those cause much less Berkeley
 # activity than plain adds.
 #
 # Call this from the parent of wc, that is, with $SANDBOX as CWD.
 # Pass one argument, a number, indicating how many cycles of exercise
 # you want.  The more cycles, the more logfiles will be generated.
 # The ratio is about two cycles per logfile.
 function exercise
 {
    limit=${1}

    saved_cwd=`pwd`
    cd ${SANDBOX}/wc

    echo ""
    i=1
    while [ ${i} -le ${limit} ]; do
      mv a1 a2
      mv b1 b2
      mv c1 c2
      ${SVN} -q rm a1 b1 c1
      ${SVN} -q add a2 b2 c2
      ${SVN} -q ci -m "Move 1s to 2s, but not as cheap copies."

      mv a2 a1
      mv b2 b1
      mv c2 c1
      ${SVN} -q rm a2 b2 c2
      ${SVN} -q add a1 b1 c1
      ${SVN} -q ci -m "Move 2s back to 1s, same way."

      echo "Exercising repository, pass ${i} of ${limit}."
      i=`dc -e "${i} 1 + p"`
    done
    echo ""

    cd ${saved_cwd}
 }

 # Generate some logfile activity.
 exercise 10

 # Do a full backup.
 head=`${SVNLOOK} youngest ${REPOS}`
 echo "Starting full backup (at r${head})..."
 mkdir ${FULL_BACKUPS}
 mkdir ${FULL_BACKUPS}/${PROJ}
 mkdir ${FULL_BACKUPS}/${PROJ}/repos
 mkdir ${FULL_BACKUPS}/${PROJ}/logs
 cd ${REPOS}/db
 ${DB_ARCHIVE} > ${RECORDS}/${PROJ}-full-backup-inactive-logfiles
 cd ../..
 cp -a ${REPOS} ${FULL_BACKUPS}/${PROJ}/repos/
 cd ${REPOS}/db
 for logfile in `${DB_ARCHIVE} -l`; do
   # For maximum paranoia, we want repository activity *while* we're
   # making the full backup.
   exercise 5
   cp ${logfile} ${FULL_BACKUPS}/${PROJ}/logs
 done
 cat ${RECORDS}/${PROJ}-full-backup-inactive-logfiles | xargs rm -f
 cd ../..
 echo "Full backup completed (r${head} was head when started)."

 # Do the incremental backups for a nominal week.
 for day in 1 2 3 4 5 6; do
   exercise 5
   head=`${SVNLOOK} youngest ${REPOS}`
   echo "Starting incremental backup ${day} (at r${head})..."
   mkdir ${INCREMENTAL_PREFIX}-${day}
   mkdir ${INCREMENTAL_PREFIX}-${day}/${PROJ}
   cd ${REPOS}/db
   ${DB_ARCHIVE} > ${RECORDS}/${PROJ}-incr-backup-${day}-inactive-logfiles
   for logfile in `${DB_ARCHIVE} -l`; do
     # For maximum paranoia, we want repository activity *while* we're
     # making the incremental backup.  But if we did commits with each
     # logfile copy, this script would be quite slow (Fibonacci effect).
     # So we only exercise on the last two "days" of incrementals.
     if [ ${day} -ge 5 ]; then
       exercise 3
     fi
     cp ${logfile} ${INCREMENTAL_PREFIX}-${day}/${PROJ}
   done
   cat ${RECORDS}/${PROJ}-incr-backup-${day}-inactive-logfiles | xargs rm -f
   cd ../..
   echo "Incremental backup ${day} done (r${head} was head when started)."
 done

 # The last revision a restoration is guaranteed to contain is whatever
 # was head at the start of the last incremental backup.
 last_guaranteed_rev=${head}

 # Make the repository vanish, so we can restore it.
 mv ${REPOS} was_${REPOS}

 echo ""
 echo "Oliver Cromwell has destroyed the repository!  Restoration coming
 up..."
 echo ""

 # Restore.
 #
 # After copying the full repository backup over, we remove the shared
 # memory segments and the dav/* stuff.  Recovery recreates the shmem
 # segments, and anything in dav/* is certainly obsolete if we're doing
 # a restore.
 #
 # Note that we use db_recover instead of 'svnadmin recover'.  This is
 # because we want to pass the -c ('catastrophic') flag to db_recover.
 # As of Subversion 1.0.x, there is no '--catastrophic' flag to
 # 'svnadmin recover', unfortunately.
 cp -a ${FULL_BACKUPS}/${PROJ}/repos/${REPOS} .
 cp -a ${FULL_BACKUPS}/${PROJ}/logs/* ${REPOS}/db
 rm -rf ${REPOS}/db/__db*
 rm -rf ${REPOS}/dav/*
 cd ${REPOS}/db
 ${DB_RECOVER} -ce
 cd ../..
 head=`${SVNLOOK} youngest ${REPOS}`
 echo ""
 echo "(Restored from full backup to r${head}...)"
 for day in 1 2 3 4 5 6; do
   cd ${REPOS}/db
   cp ${INCREMENTAL_PREFIX}-${day}/${PROJ}/* .
   ${DB_RECOVER} -ce
   cd ../..
   head=`${SVNLOOK} youngest ${REPOS}`
   echo "(Restored from incremental-${day} to r${head}...)"
 done
 echo ""
 echo "Restoration complete.  All hail the King."

 # Verify the restoration.
 was_head=`${SVNLOOK} youngest was_${REPOS}`
 restored_head=`${SVNLOOK} youngest ${REPOS}`
 echo ""
 echo "Highest revision in original repository:  ${was_head}"
 echo "Highest revision restored:                ${restored_head}"
 echo ""
 echo "(It's okay if restored is less than original, even much less.)"

 if [ ${restored_head} -lt ${last_guaranteed_rev} ]; then
    echo ""
    echo "Restoration failed because r${restored_head} is too low --"
    echo "should have restored to at least r${last_guaranteed_rev}."
    exit 1
 fi

 # Looks like we restored at least to the minimum required revision.
 # Let's do some spot checks, though.

 echo ""
 echo "Comparing logs up to r${restored_head} for both repositories..."
 ${SVN} log -v -r1:${restored_head} file://`pwd`/was_${REPOS} > a
 ${SVN} log -v -r1:${restored_head} file://`pwd`/${REPOS}     > b
 if cmp a b; then
   echo "Done comparing logs."
 else
   echo "Log comparison failed -- restored repository is not right."
   exit 1
 fi

 echo ""
 echo "Comparing r${restored_head} exported trees from both repositories..."
 ${SVN} -q export -r${restored_head} file://`pwd`/was_${REPOS} orig-export
 ${SVN} -q export -r${restored_head} file://`pwd`/${REPOS} restored-export
 if diff -q -r orig-export restored-export; then
   echo "Done comparing r${restored_head} exported trees."
 else
   echo "Recursive diff failed -- restored repository is not right."
 fi

 echo ""
 echo "Done."
	#!/bin/sh

	###########################################################################
	# #
	# This shell script demonstrates a backup/restore recipe for live #
	# Subversion repositories, using a standard full+incrementals process. #
	# #
	# This script is intended only as an example; the idea is that you #
	# can read over it, understand how it works (it's extensively commented) #
	# and then implement real backup and restore scripts based on this #
	# recipe. #
	# #
	# To reiterate: this is not a backup and restore solution. It's #
	# really just documentation, in the form of code with comments. #
	# #
	# If you do implement your own scripts based on the recipe here, and #
	# your implementations are generic enough to be generally useful, #
	# please post them to dev@subversion.tigris.org. It would be great if #
	# we could offer a real solution, and not just a description of one. #
	# #
	# This recipe is distilled from the Berkeley DB documentation, see #
	# http://www.sleepycat.com/docs/ref/transapp/archival.html. #
	# #
	# See also http://www.sleepycat.com/docs/ref/transapp/reclimit.html for #
	# for possible problems using standard 'cp' in this recipe. #
	# #
	###########################################################################

	# High-level overview of the full backup recipe:
	#
	# 1. Ask BDB's db_archive for a list of unused log files.
	#
	# 2. Copy the entire db/ dir to the backup area.
	#
	# 3. Recopy all the logfiles to the backup area. There may be more
	# logfiles now than there were when step (1) ran.
	#
	# 4. Remove the logfiles listed as inactive in step (1) from the
	# repository, though not from the backup.
	#
	# High-level overview of the incremental backup recipe:
	#
	# 1. Just copy the Berkeley logfiles to a backup area.
	#
	# High-level overview of the restoration recipe:
	#
	# 1. Copy all the datafiles and logfiles back to the repository, in
	# the same order they were backed up.
	#
	# 2. Run Berkeley's "catastrophic recovery" command on the repository.
	#
	# That's it. Here we go...

	# You might need to customize some of these paths.
	SVN=svn
	SVNADMIN=svnadmin
	SVNLOOK=svnlook
	# See http://www.sleepycat.com/docs/utility/db_archive.html:
	DB_ARCHIVE=/usr/local/BerkeleyDB.4.2/bin/db_archive
	# See http://www.sleepycat.com/docs/utility/db_recover.html:
	DB_RECOVER=/usr/local/BerkeleyDB.4.2/bin/db_recover

	# This is just source data to generate repository activity.
	# Any binary file of about 64k will do, it doesn't have to be /bin/ls.
	DATA_BLOB=/bin/ls

	# You shouldn't need to customize below here.
	SANDBOX=`pwd`/backups-test-tmp
	FULL_BACKUPS=${SANDBOX}/full
	INCREMENTAL_PREFIX=${SANDBOX}/incremental-logs
	RECORDS=${SANDBOX}/records
	PROJ=myproj
	REPOS=${PROJ}-repos

	rm -rf ${SANDBOX}
	mkdir ${SANDBOX}
	mkdir ${RECORDS}

	cd ${SANDBOX}

	${SVNADMIN} create --bdb-log-keep ${REPOS}
	${SVN} co file://${SANDBOX}/${REPOS} wc

	cd wc

	# Put in enough data for us to exercise the logfiles.
	cp ${DATA_BLOB} ./a1
	cp ${DATA_BLOB} ./b1
	cp ${DATA_BLOB} ./c1
	${SVN} -q add a1 b1 c1
	${SVN} -q ci -m "Initial add."

	echo "Created test data."

	cd ..

	# Exercise the logfiles by moving data around a lot. Note that we
	# avoid adds-with-history, since those cause much less Berkeley
	# activity than plain adds.
	#
	# Call this from the parent of wc, that is, with $SANDBOX as CWD.
	# Pass one argument, a number, indicating how many cycles of exercise
	# you want. The more cycles, the more logfiles will be generated.
	# The ratio is about two cycles per logfile.
	function exercise
	{
	limit=${1}

	saved_cwd=`pwd`
	cd ${SANDBOX}/wc

	echo ""
	i=1
	while [ ${i} -le ${limit} ]; do
	mv a1 a2
	mv b1 b2
	mv c1 c2
	${SVN} -q rm a1 b1 c1
	${SVN} -q add a2 b2 c2
	${SVN} -q ci -m "Move 1s to 2s, but not as cheap copies."

	mv a2 a1
	mv b2 b1
	mv c2 c1
	${SVN} -q rm a2 b2 c2
	${SVN} -q add a1 b1 c1
	${SVN} -q ci -m "Move 2s back to 1s, same way."

	echo "Exercising repository, pass ${i} of ${limit}."
	i=`dc -e "${i} 1 + p"`
	done
	echo ""

	cd ${saved_cwd}
	}

	# Generate some logfile activity.
	exercise 10

	# Do a full backup.
	head=`${SVNLOOK} youngest ${REPOS}`
	echo "Starting full backup (at r${head})..."
	mkdir ${FULL_BACKUPS}
	mkdir ${FULL_BACKUPS}/${PROJ}
	mkdir ${FULL_BACKUPS}/${PROJ}/repos
	mkdir ${FULL_BACKUPS}/${PROJ}/logs
	cd ${REPOS}/db
	${DB_ARCHIVE} > ${RECORDS}/${PROJ}-full-backup-inactive-logfiles
	cd ../..
	cp -a ${REPOS} ${FULL_BACKUPS}/${PROJ}/repos/
	cd ${REPOS}/db
	for logfile in `${DB_ARCHIVE} -l`; do
	# For maximum paranoia, we want repository activity while we're
	# making the full backup.
	exercise 5
	cp ${logfile} ${FULL_BACKUPS}/${PROJ}/logs
	done
	cat ${RECORDS}/${PROJ}-full-backup-inactive-logfiles \| xargs rm -f
	cd ../..
	echo "Full backup completed (r${head} was head when started)."

	# Do the incremental backups for a nominal week.
	for day in 1 2 3 4 5 6; do
	exercise 5
	head=`${SVNLOOK} youngest ${REPOS}`
	echo "Starting incremental backup ${day} (at r${head})..."
	mkdir ${INCREMENTAL_PREFIX}-${day}
	mkdir ${INCREMENTAL_PREFIX}-${day}/${PROJ}
	cd ${REPOS}/db
	${DB_ARCHIVE} > ${RECORDS}/${PROJ}-incr-backup-${day}-inactive-logfiles
	for logfile in `${DB_ARCHIVE} -l`; do
	# For maximum paranoia, we want repository activity while we're
	# making the incremental backup. But if we did commits with each
	# logfile copy, this script would be quite slow (Fibonacci effect).
	# So we only exercise on the last two "days" of incrementals.
	if [ ${day} -ge 5 ]; then
	exercise 3
	fi
	cp ${logfile} ${INCREMENTAL_PREFIX}-${day}/${PROJ}
	done
	cat ${RECORDS}/${PROJ}-incr-backup-${day}-inactive-logfiles \| xargs rm -f
	cd ../..
	echo "Incremental backup ${day} done (r${head} was head when started)."
	done

	# The last revision a restoration is guaranteed to contain is whatever
	# was head at the start of the last incremental backup.
	last_guaranteed_rev=${head}

	# Make the repository vanish, so we can restore it.
	mv ${REPOS} was_${REPOS}

	echo ""
	echo "Oliver Cromwell has destroyed the repository! Restoration coming
	up..."
	echo ""

	# Restore.
	#
	# After copying the full repository backup over, we remove the shared
	# memory segments and the dav/* stuff. Recovery recreates the shmem
	# segments, and anything in dav/* is certainly obsolete if we're doing
	# a restore.
	#
	# Note that we use db_recover instead of 'svnadmin recover'. This is
	# because we want to pass the -c ('catastrophic') flag to db_recover.
	# As of Subversion 1.0.x, there is no '--catastrophic' flag to
	# 'svnadmin recover', unfortunately.
	cp -a ${FULL_BACKUPS}/${PROJ}/repos/${REPOS} .
	cp -a ${FULL_BACKUPS}/${PROJ}/logs/* ${REPOS}/db
	rm -rf ${REPOS}/db/__db*
	rm -rf ${REPOS}/dav/*
	cd ${REPOS}/db
	${DB_RECOVER} -ce
	cd ../..
	head=`${SVNLOOK} youngest ${REPOS}`
	echo ""
	echo "(Restored from full backup to r${head}...)"
	for day in 1 2 3 4 5 6; do
	cd ${REPOS}/db
	cp ${INCREMENTAL_PREFIX}-${day}/${PROJ}/* .
	${DB_RECOVER} -ce
	cd ../..
	head=`${SVNLOOK} youngest ${REPOS}`
	echo "(Restored from incremental-${day} to r${head}...)"
	done
	echo ""
	echo "Restoration complete. All hail the King."

	# Verify the restoration.
	was_head=`${SVNLOOK} youngest was_${REPOS}`
	restored_head=`${SVNLOOK} youngest ${REPOS}`
	echo ""
	echo "Highest revision in original repository: ${was_head}"
	echo "Highest revision restored: ${restored_head}"
	echo ""
	echo "(It's okay if restored is less than original, even much less.)"

	if [ ${restored_head} -lt ${last_guaranteed_rev} ]; then
	echo ""
	echo "Restoration failed because r${restored_head} is too low --"
	echo "should have restored to at least r${last_guaranteed_rev}."
	exit 1
	fi

	# Looks like we restored at least to the minimum required revision.
	# Let's do some spot checks, though.

	echo ""
	echo "Comparing logs up to r${restored_head} for both repositories..."
	${SVN} log -v -r1:${restored_head} file://`pwd`/was_${REPOS} > a
	${SVN} log -v -r1:${restored_head} file://`pwd`/${REPOS} > b
	if cmp a b; then
	echo "Done comparing logs."
	else
	echo "Log comparison failed -- restored repository is not right."
	exit 1
	fi

	echo ""
	echo "Comparing r${restored_head} exported trees from both repositories..."
	${SVN} -q export -r${restored_head} file://`pwd`/was_${REPOS} orig-export
	${SVN} -q export -r${restored_head} file://`pwd`/${REPOS} restored-export
	if diff -q -r orig-export restored-export; then
	echo "Done comparing r${restored_head} exported trees."
	else
	echo "Recursive diff failed -- restored repository is not right."
	fi

	echo ""
	echo "Done."