blob: 7de83a23576f2b9f0659fba4fe3fb6f81fb16275 [file] [log] [blame]
#!/usr/bin/env python
"""Python script to replicate and replay databases.
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
import argparse
import time
import re
import couchdb.client
import functools
def retry(fn, retries):
return fn()
if (retries > 0):
return retry(fn, retries - 1)
def replicateDatabases(args):
"""Replicate databases."""
sourceDb = couchdb.client.Server(args.sourceDbUrl)
targetDb = couchdb.client.Server(args.targetDbUrl)
excludedDatabases = args.exclude.split(",")
excludedBaseNames = [x for x in args.excludeBaseName.split(",") if x != ""]
# Create _replicator DB if it does not exist yet.
if "_replicator" not in sourceDb:
replicator = sourceDb["_replicator"]
now = int(time.time())
backupPrefix = "backup_%d_" % now
def isExcluded(dbName):
dbNameWithoutPrefix = dbName.replace(args.dbPrefix, "", 1)
# is the databaseName is in the list of excluded database
isNameExcluded = dbNameWithoutPrefix in excludedDatabases
# if one of the basenames matches, the database is excluded
isBaseNameExcluded = functools.reduce(lambda x, y: x or y, [dbNameWithoutPrefix.startswith(en) for en in excludedBaseNames], False)
return isNameExcluded or isBaseNameExcluded
# Create backup of all databases with given prefix
print("----- Create backups -----")
for db in [dbName for dbName in sourceDb if dbName.startswith(args.dbPrefix) and not isExcluded(dbName)]:
backupDb = backupPrefix + db if not args.continuous else 'continuous_' + db
replicateDesignDocument = {
"_id": backupDb,
"source": args.sourceDbUrl + "/" + db,
"target": args.targetDbUrl + "/" + backupDb,
"create_target": True,
"continuous": args.continuous,
print("create backup: %s" % backupDb)
filterName = "snapshotFilters"
filterDesignDocument = sourceDb[db].get("_design/%s" % filterName)
if not args.continuous and filterDesignDocument:
replicateDesignDocument["filter"] = "%s/withoutDeletedAndDesignDocuments" % filterName
def isBackupDb(dbName):
return re.match("^backup_\d+_" + args.dbPrefix, dbName)
def extractTimestamp(dbName):
return int(dbName.split("_")[1])
def isExpired(timestamp):
return now - args.expires > timestamp
# Delete all documents in the _replicator-database of old backups to avoid that they continue after they are deprecated
print("----- Delete backup-documents older than %d seconds -----" % args.expires)
for doc in [doc for doc in replicator.view('_all_docs', include_docs=True) if isBackupDb( and isExpired(extractTimestamp(]:
print("deleting backup document: %s" %
# Get again the latest version of the document to delete the right revision and avoid Conflicts
retry(lambda: replicator.delete(replicator[]), 5)
# Delete all backup-databases, that are older than specified
print("----- Delete backups older than %d seconds -----" % args.expires)
for db in [db for db in targetDb if isBackupDb(db) and isExpired(extractTimestamp(db))]:
print("deleting backup: %s" % db)
def replayDatabases(args):
"""Replays databases."""
sourceDb = couchdb.client.Server(args.sourceDbUrl)
# Create _replicator DB if it does not exist yet.
if "_replicator" not in sourceDb:
for db in [dbName for dbName in sourceDb if dbName.startswith(args.dbPrefix)]:
plainDbName = db.replace(args.dbPrefix, "")
(identifier, _) = sourceDb["_replicator"].save({
"source": args.sourceDbUrl + "/" + db,
"target": args.targetDbUrl + "/" + plainDbName,
"create_target": True
print("replaying backup: %s -> %s (%s)" % (db, plainDbName, identifier))
parser = argparse.ArgumentParser(description="Utility to create a backup of all databases with the defined prefix.")
parser.add_argument("--sourceDbUrl", required=True, help="Server URL of the source database, that has to be backed up. E.g. ''")
parser.add_argument("--targetDbUrl", required=True, help="Server URL of the target database, where the backup is stored. Like sourceDbUrl.")
subparsers = parser.add_subparsers(help='sub-command help')
# Replicate
replicateParser = subparsers.add_parser("replicate", help="Replicates source databases to the target database.")
replicateParser.add_argument("--dbPrefix", required=True, help="Prefix of the databases, that should be backed up.")
replicateParser.add_argument("--expires", required=True, type=int, help="Deletes all backups, that are older than the given value in seconds.")
replicateParser.add_argument("--continuous", action="store_true", help="Wether or not the backup should be continuous")
replicateParser.add_argument("--exclude", default="", help="Comma separated list of database names, that should not be backed up. (Without prefix).")
replicateParser.add_argument("--excludeBaseName", default="", help="Comma separated list of database base names. All databases, that have this basename in their name will not be backed up. (Without prefix).")
# Replay
replicateParser = subparsers.add_parser("replay", help="Replays source databases to the target database.")
replicateParser.add_argument("--dbPrefix", required=True, help="Prefix of the databases, that should be replayed. Usually 'backup_{TIMESTAMP}_'")
arguments = parser.parse_args()