tools/copy-list.py - incubator-ponymail - Git at Google

 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 """ Copy lists

 This utility can be used to:
 - copy a list within a database
 - copy a list to a new database

 """

 import sys
 import time
 import argparse

 from elastic import Elastic

 sourceLID = None
 targetLID = None
 wildcard = None
 debug = False
 notag = False
 newdb = None

 # get config and set up default databas
 es = Elastic()
 # default database name
 dbname = es.getdbname()

 rootURL = ""

 parser = argparse.ArgumentParser(description='Command line options.')
 parser.add_argument('--source', dest='source', type=str, required=True,
                    metavar='<list id>', help='Source list to edit')
 parser.add_argument('--target', dest='target', type=str,
                    metavar='<list id>', help='(optional) new list ID')
 parser.add_argument('--newdb', dest='newdb', type=str,
                    metavar='<index name>', help='(optional) new ES database name')
 parser.add_argument('--wildcard', dest='glob', action='store_true',
                    help='Allow wildcards in --source')
 parser.add_argument('--notag', dest='notag', action='store_true',
                    help='List IDs do not have <> in them')

 args = parser.parse_args()

 sourceLID = args.source
 targetLID = args.target
 newdb = args.newdb
 wildcard = args.glob
 notag = args.notag

 if not (targetLID or newdb):
     print("Nothing to do! No target list ID or DB name specified")
     parser.print_help()
     sys.exit(-1)

 sourceLID = ("%s" if notag else "<%s>")  % sourceLID.replace("@", ".").strip("<>")
 if newdb and not targetLID:
     targetLID = sourceLID

 if targetLID:
     targetLID = "<%s>" % targetLID.replace("@", ".").strip("<>")

 if targetLID == sourceLID and not newdb:
     print("Nothing to do! Target same as source")
     parser.print_help()
     sys.exit(-1)

 print("Beginning list copy:")
 print("  - Source ID: %s" % sourceLID)
 if targetLID:
     print("  - Target ID: %s" % targetLID)
 if newdb:
     print("  - Target DB: %s" % newdb)
     if not es.indices.exists(newdb):
         print("Target database does not exist!")
         sys.exit(-1)

 count = 0


 print("Updating docs...")
 then = time.time()
 query = {
     'query': {
         'bool': {
             'must': [
                 {
                     'wildcard' if wildcard else 'term': {
                         'list_raw': sourceLID
                     }
                 }
             ]
         }
     }
 }
 js_arr = []
 for page in es.scan_and_scroll(body = query):
     sid = page['_scroll_id']
     for hit in page['hits']['hits']:
         doc = hit['_id']
         body = es.get(doc_type = 'mbox', id = doc)
         srcdoc = doc # save
         if targetLID != sourceLID:
             doc = hit['_id'].replace(sourceLID,targetLID)
             body['_source']['mid'] = doc
             body['_source']['list_raw'] = targetLID
             body['_source']['list'] = targetLID
         js_arr.append({
             '_op_type': 'index',
             '_index': newdb if newdb else dbname,
             '_type': 'mbox',
             '_id': doc,
             '_source': body['_source']
         })
         source = es.get(doc_type = 'mbox_source', id = srcdoc, ignore=404)
         if source['found']:
             js_arr.append({
                 '_op_type': 'index',
                 '_index': newdb if newdb else dbname,
                 '_type': 'mbox_source',
                 '_id': doc,
                 '_source': source['_source']
             })
         else:
             print("Source for %s not found, hmm..." % doc)

         count += 1
         if (count % 50 == 0):
             print("Processed %u emails..." % count)
             es.bulk(js_arr)
             js_arr = []

 if len(js_arr) > 0:
     es.bulk(js_arr)

 print("All done, processed %u docs in %u seconds" % (count, time.time() - then))
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	""" Copy lists

	This utility can be used to:
	- copy a list within a database
	- copy a list to a new database

	"""

	import sys
	import time
	import argparse

	from elastic import Elastic

	sourceLID = None
	targetLID = None
	wildcard = None
	debug = False
	notag = False
	newdb = None

	# get config and set up default databas
	es = Elastic()
	# default database name
	dbname = es.getdbname()

	rootURL = ""

	parser = argparse.ArgumentParser(description='Command line options.')
	parser.add_argument('--source', dest='source', type=str, required=True,
	metavar='<list id>', help='Source list to edit')
	parser.add_argument('--target', dest='target', type=str,
	metavar='<list id>', help='(optional) new list ID')
	parser.add_argument('--newdb', dest='newdb', type=str,
	metavar='<index name>', help='(optional) new ES database name')
	parser.add_argument('--wildcard', dest='glob', action='store_true',
	help='Allow wildcards in --source')
	parser.add_argument('--notag', dest='notag', action='store_true',
	help='List IDs do not have <> in them')

	args = parser.parse_args()

	sourceLID = args.source
	targetLID = args.target
	newdb = args.newdb
	wildcard = args.glob
	notag = args.notag

	if not (targetLID or newdb):
	print("Nothing to do! No target list ID or DB name specified")
	parser.print_help()
	sys.exit(-1)

	sourceLID = ("%s" if notag else "<%s>") % sourceLID.replace("@", ".").strip("<>")
	if newdb and not targetLID:
	targetLID = sourceLID

	if targetLID:
	targetLID = "<%s>" % targetLID.replace("@", ".").strip("<>")

	if targetLID == sourceLID and not newdb:
	print("Nothing to do! Target same as source")
	parser.print_help()
	sys.exit(-1)

	print("Beginning list copy:")
	print(" - Source ID: %s" % sourceLID)
	if targetLID:
	print(" - Target ID: %s" % targetLID)
	if newdb:
	print(" - Target DB: %s" % newdb)
	if not es.indices.exists(newdb):
	print("Target database does not exist!")
	sys.exit(-1)

	count = 0


	print("Updating docs...")
	then = time.time()
	query = {
	'query': {
	'bool': {
	'must': [
	{
	'wildcard' if wildcard else 'term': {
	'list_raw': sourceLID
	}
	}
	]
	}
	}
	}
	js_arr = []
	for page in es.scan_and_scroll(body = query):
	sid = page['_scroll_id']
	for hit in page['hits']['hits']:
	doc = hit['_id']
	body = es.get(doc_type = 'mbox', id = doc)
	srcdoc = doc # save
	if targetLID != sourceLID:
	doc = hit['_id'].replace(sourceLID,targetLID)
	body['_source']['mid'] = doc
	body['_source']['list_raw'] = targetLID
	body['_source']['list'] = targetLID
	js_arr.append({
	'_op_type': 'index',
	'_index': newdb if newdb else dbname,
	'_type': 'mbox',
	'_id': doc,
	'_source': body['_source']
	})
	source = es.get(doc_type = 'mbox_source', id = srcdoc, ignore=404)
	if source['found']:
	js_arr.append({
	'_op_type': 'index',
	'_index': newdb if newdb else dbname,
	'_type': 'mbox_source',
	'_id': doc,
	'_source': source['_source']
	})
	else:
	print("Source for %s not found, hmm..." % doc)

	count += 1
	if (count % 50 == 0):
	print("Processed %u emails..." % count)
	es.bulk(js_arr)
	js_arr = []

	if len(js_arr) > 0:
	es.bulk(js_arr)

	print("All done, processed %u docs in %u seconds" % (count, time.time() - then))