| #!/usr/bin/env python |
| """Python script to replicate and replay databases. |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| """ |
| |
| |
| import argparse |
| import time |
| import re |
| import couchdb.client |
| import functools |
| |
| def retry(fn, retries): |
| try: |
| return fn() |
| except: |
| if (retries > 0): |
| time.sleep(1) |
| return retry(fn, retries - 1) |
| else: |
| raise |
| |
| |
| def replicateDatabases(args): |
| """Replicate databases.""" |
| sourceDb = couchdb.client.Server(args.sourceDbUrl) |
| targetDb = couchdb.client.Server(args.targetDbUrl) |
| |
| excludedDatabases = args.exclude.split(",") |
| excludedBaseNames = [x for x in args.excludeBaseName.split(",") if x != ""] |
| |
| # Create _replicator DB if it does not exist yet. |
| if "_replicator" not in sourceDb: |
| sourceDb.create("_replicator") |
| |
| replicator = sourceDb["_replicator"] |
| |
| now = int(time.time()) |
| backupPrefix = "backup_%d_" % now |
| |
| def isExcluded(dbName): |
| dbNameWithoutPrefix = dbName.replace(args.dbPrefix, "", 1) |
| # is the databaseName is in the list of excluded database |
| isNameExcluded = dbNameWithoutPrefix in excludedDatabases |
| # if one of the basenames matches, the database is excluded |
| isBaseNameExcluded = functools.reduce(lambda x, y: x or y, [dbNameWithoutPrefix.startswith(en) for en in excludedBaseNames], False) |
| return isNameExcluded or isBaseNameExcluded |
| |
| # Create backup of all databases with given prefix |
| print("----- Create backups -----") |
| for db in [dbName for dbName in sourceDb if dbName.startswith(args.dbPrefix) and not isExcluded(dbName)]: |
| backupDb = backupPrefix + db if not args.continuous else 'continuous_' + db |
| replicateDesignDocument = { |
| "_id": backupDb, |
| "source": args.sourceDbUrl + "/" + db, |
| "target": args.targetDbUrl + "/" + backupDb, |
| "create_target": True, |
| "continuous": args.continuous, |
| } |
| print("create backup: %s" % backupDb) |
| |
| filterName = "snapshotFilters" |
| filterDesignDocument = sourceDb[db].get("_design/%s" % filterName) |
| if not args.continuous and filterDesignDocument: |
| replicateDesignDocument["filter"] = "%s/withoutDeletedAndDesignDocuments" % filterName |
| replicator.save(replicateDesignDocument) |
| |
| def isBackupDb(dbName): |
| return re.match("^backup_\d+_" + args.dbPrefix, dbName) |
| |
| def extractTimestamp(dbName): |
| return int(dbName.split("_")[1]) |
| |
| def isExpired(timestamp): |
| return now - args.expires > timestamp |
| |
| # Delete all documents in the _replicator-database of old backups to avoid that they continue after they are deprecated |
| print("----- Delete backup-documents older than %d seconds -----" % args.expires) |
| for doc in [doc for doc in replicator.view('_all_docs', include_docs=True) if isBackupDb(doc.id) and isExpired(extractTimestamp(doc.id))]: |
| print("deleting backup document: %s" % doc.id) |
| # Get again the latest version of the document to delete the right revision and avoid Conflicts |
| retry(lambda: replicator.delete(replicator[doc.id]), 5) |
| |
| # Delete all backup-databases, that are older than specified |
| print("----- Delete backups older than %d seconds -----" % args.expires) |
| for db in [db for db in targetDb if isBackupDb(db) and isExpired(extractTimestamp(db))]: |
| print("deleting backup: %s" % db) |
| targetDb.delete(db) |
| |
| |
| def replayDatabases(args): |
| """Replays databases.""" |
| sourceDb = couchdb.client.Server(args.sourceDbUrl) |
| |
| # Create _replicator DB if it does not exist yet. |
| if "_replicator" not in sourceDb: |
| sourceDb.create("_replicator") |
| |
| for db in [dbName for dbName in sourceDb if dbName.startswith(args.dbPrefix)]: |
| plainDbName = db.replace(args.dbPrefix, "") |
| (identifier, _) = sourceDb["_replicator"].save({ |
| "source": args.sourceDbUrl + "/" + db, |
| "target": args.targetDbUrl + "/" + plainDbName, |
| "create_target": True |
| }) |
| print("replaying backup: %s -> %s (%s)" % (db, plainDbName, identifier)) |
| |
| parser = argparse.ArgumentParser(description="Utility to create a backup of all databases with the defined prefix.") |
| parser.add_argument("--sourceDbUrl", required=True, help="Server URL of the source database, that has to be backed up. E.g. 'https://xxx:yyy@domain.couch.com:443'") |
| parser.add_argument("--targetDbUrl", required=True, help="Server URL of the target database, where the backup is stored. Like sourceDbUrl.") |
| subparsers = parser.add_subparsers(help='sub-command help') |
| |
| # Replicate |
| replicateParser = subparsers.add_parser("replicate", help="Replicates source databases to the target database.") |
| replicateParser.add_argument("--dbPrefix", required=True, help="Prefix of the databases, that should be backed up.") |
| replicateParser.add_argument("--expires", required=True, type=int, help="Deletes all backups, that are older than the given value in seconds.") |
| replicateParser.add_argument("--continuous", action="store_true", help="Wether or not the backup should be continuous") |
| replicateParser.add_argument("--exclude", default="", help="Comma separated list of database names, that should not be backed up. (Without prefix).") |
| replicateParser.add_argument("--excludeBaseName", default="", help="Comma separated list of database base names. All databases, that have this basename in their name will not be backed up. (Without prefix).") |
| replicateParser.set_defaults(func=replicateDatabases) |
| |
| # Replay |
| replicateParser = subparsers.add_parser("replay", help="Replays source databases to the target database.") |
| replicateParser.add_argument("--dbPrefix", required=True, help="Prefix of the databases, that should be replayed. Usually 'backup_{TIMESTAMP}_'") |
| replicateParser.set_defaults(func=replayDatabases) |
| |
| arguments = parser.parse_args() |
| arguments.func(arguments) |