blob: 7866b2e69939811efd608e3dcee6de5a484419e6 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import argparse
import logging
from itertools import groupby
from collections import defaultdict
from operator import itemgetter
from ming.odm import ThreadLocalORMSession
from allura.scripts import ScriptTask
from allura import model as M
log = logging.getLogger(__name__)
class RemoveDuplicateTroves(ScriptTask):
trove_types = [
'trove_root_database',
'trove_developmentstatus',
'trove_audience',
'trove_license',
'trove_os',
'trove_language',
'trove_topic',
'trove_natlanguage',
'trove_environment',
]
@classmethod
def execute(cls, options):
duplicates = cls._find_duplicates()
log.info('Found %s duplicate categories: %s', len(duplicates), duplicates.keys())
for name, dups in duplicates.iteritems():
projects_with_category = {}
for dup in dups:
projects = cls._projects_with_category(dup._id)
projects_with_category[dup._id] = projects
log.info('Following projects are using category %s:', name)
for _id, ps in projects_with_category.iteritems():
log.info(' with id %s: %s', _id, [p.shortname for p in ps])
priority = [(i, len(ps)) for i, ps in projects_with_category.items()]
priority = sorted(priority, key=itemgetter(1), reverse=True)
priority = [p[0] for p in priority]
live, kill = priority[0], priority[1:]
log.info('%s will live %s will die', live, kill)
if sum([len(projects_with_category[_id]) for _id in kill]) > 0:
# Duplicates are used somewhere, need to reasign for all projects that use them
projects = []
ids_to_kill = set(kill)
for p in [projects_with_category[_id] for _id in kill]:
projects.extend(p)
for p in projects:
for tt in cls.trove_types:
_ids = ids_to_kill.intersection(getattr(p, tt))
for _id in _ids:
log.info('Removing %s from %s.%s and adding %s instead', _id, p.shortname, tt, live)
if not options.dry_run:
getattr(p, tt).remove(_id)
getattr(p, tt).append(live)
log.info('Removing categories %s', kill)
if not options.dry_run:
M.TroveCategory.query.remove({'_id': {'$in': kill}})
ThreadLocalORMSession.flush_all()
@classmethod
def _find_duplicates(cls):
dups = []
agpl = M.TroveCategory.query.find({'shortname': 'agpl'}).all()
if len(agpl) > 1:
# agpl is present twice with different cat_id
# (update in creation command updated only one of duplicates),
# so code below will not catch it
dups.extend(agpl)
for cat in M.TroveCategory.query.find():
if M.TroveCategory.query.find({
'shortname': cat.shortname,
'trove_cat_id': cat.trove_cat_id,
'trove_parent_id': cat.trove_parent_id,
'fullname': cat.fullname,
'fullpath': cat.fullpath,
}).count() > 1:
dups.append(cat)
result = defaultdict(list)
for k, v in groupby(dups, lambda x: x.shortname):
result[k].extend(list(v))
return result
@classmethod
def _projects_with_category(cls, _id):
p = M.Project.query.find({'$or': [
{'trove_root_database': _id},
{'trove_developmentstatus': _id},
{'trove_audience': _id},
{'trove_license': _id},
{'trove_os': _id},
{'trove_language': _id},
{'trove_topic': _id},
{'trove_natlanguage': _id},
{'trove_environment':_id},
]})
return p.all()
@classmethod
def parser(cls):
parser = argparse.ArgumentParser(description='Remove duplicate troves')
parser.add_argument('--dry-run', action='store_true', dest='dry_run',
default=False, help='Print what will be changed but do not change anything')
return parser
if __name__ == '__main__':
RemoveDuplicateTroves.main()