blob: bf6d9058fdc82e4483e433b8095263221c8bd423 [file] [log] [blame]
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Copy lists
This utility can be used to:
- copy a list within a database
- copy a list to a new database
"""
import sys
import time
import argparse
from elastic import Elastic
sourceLID = None
targetLID = None
wildcard = None
debug = False
notag = False
newdb = None
# get config and set up default databas
es = Elastic()
# default database name
dbname = es.getdbname()
rootURL = ""
parser = argparse.ArgumentParser(description='Command line options.')
parser.add_argument('--source', dest='source', type=str, required=True,
metavar='<list id>', help='Source list to edit')
parser.add_argument('--target', dest='target', type=str,
metavar='<list id>', help='(optional) new list ID')
parser.add_argument('--newdb', dest='newdb', type=str,
metavar='<index name>', help='(optional) new ES database name')
parser.add_argument('--wildcard', dest='glob', action='store_true',
help='Allow wildcards in --source')
parser.add_argument('--notag', dest='notag', action='store_true',
help='List IDs do not have <> in them')
args = parser.parse_args()
sourceLID = args.source
targetLID = args.target
newdb = args.newdb
wildcard = args.glob
notag = args.notag
if not (targetLID or newdb):
print("Nothing to do! No target list ID or DB name specified")
parser.print_help()
sys.exit(-1)
sourceLID = ("%s" if notag else "<%s>") % sourceLID.replace("@", ".").strip("<>")
if newdb and not targetLID:
targetLID = sourceLID
if targetLID:
targetLID = "<%s>" % targetLID.replace("@", ".").strip("<>")
if targetLID == sourceLID and not newdb:
print("Nothing to do! Target same as source")
parser.print_help()
sys.exit(-1)
print("Beginning list copy:")
print(" - Source ID: %s" % sourceLID)
if targetLID:
print(" - Target ID: %s" % targetLID)
if newdb:
print(" - Target DB: %s" % newdb)
if not es.indices.exists(newdb):
print("Target database does not exist!")
sys.exit(-1)
count = 0
print("Updating docs...")
then = time.time()
query = {
'query': {
'bool': {
'must': [
{
'wildcard' if wildcard else 'term': {
'list_raw': sourceLID
}
}
]
}
}
}
js_arr = []
for page in es.scan_and_scroll(body = query):
sid = page['_scroll_id']
for hit in page['hits']['hits']:
doc = hit['_id']
body = es.get(doc_type = 'mbox', id = doc)
srcdoc = doc # save
if targetLID != sourceLID:
doc = hit['_id'].replace(sourceLID,targetLID)
body['_source']['mid'] = doc
body['_source']['list_raw'] = targetLID
body['_source']['list'] = targetLID
js_arr.append({
'_op_type': 'index',
'_index': newdb if newdb else dbname,
'_type': 'mbox',
'_id': doc,
'_source': body['_source']
})
source = es.get(doc_type = 'mbox_source', id = srcdoc, ignore=404)
if source['found']:
js_arr.append({
'_op_type': 'index',
'_index': newdb if newdb else dbname,
'_type': 'mbox_source',
'_id': doc,
'_source': source['_source']
})
else:
print("Source for %s not found, hmm..." % doc)
count += 1
if (count % 50 == 0):
print("Processed %u emails..." % count)
es.bulk(js_arr)
js_arr = []
if len(js_arr) > 0:
es.bulk(js_arr)
print("All done, processed %u docs in %u seconds" % (count, time.time() - then))