| import argparse |
| import logging |
| import re |
| |
| import faulthandler |
| from pylons import c |
| from ming.orm import ThreadLocalORMSession |
| |
| from allura import model as M |
| from allura.lib.utils import chunked_find, chunked_list |
| |
| log = logging.getLogger(__name__) |
| |
| |
| def main(options): |
| q_project = {} |
| if options.nbhd: |
| nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd) |
| if not nbhd: |
| return "Invalid neighborhood url prefix." |
| q_project['neighborhood_id'] = nbhd._id |
| if options.project: |
| q_project['shortname'] = options.project |
| elif options.project_regex: |
| q_project['shortname'] = {'$regex': options.project_regex} |
| |
| log.info('Refreshing repositories') |
| if options.clean_all: |
| log.info('Removing all repository objects') |
| M.repo.CommitDoc.m.remove({}) |
| M.repo.TreeDoc.m.remove({}) |
| M.repo.TreesDoc.m.remove({}) |
| M.repo.DiffInfoDoc.m.remove({}) |
| M.repo.CommitRunDoc.m.remove({}) |
| |
| for chunk in chunked_find(M.Project, q_project): |
| for p in chunk: |
| log.info("Refreshing repos for project '%s'." % p.shortname) |
| if options.dry_run: |
| continue |
| c.project = p |
| if options.mount_point: |
| mount_points = [options.mount_point] |
| else: |
| mount_points = [ac.options.mount_point for ac in |
| M.AppConfig.query.find(dict(project_id=p._id))] |
| for app in (p.app_instance(mp) for mp in mount_points): |
| c.app = app |
| if not hasattr(app, 'repo'): |
| continue |
| if c.app.repo.tool.lower() not in options.repo_types: |
| log.info("Skipping %r: wrong type (%s)", c.app.repo, |
| c.app.repo.tool.lower()) |
| continue |
| |
| if options.clean: |
| ci_ids = list(c.app.repo.all_commit_ids()) |
| log.info("Deleting mongo data for %i commits...", len(ci_ids)) |
| tree_ids = [ |
| tree_id for doc in |
| M.repo.TreesDoc.m.find({"_id": {"$in": ci_ids}}, |
| {"tree_ids": 1}) |
| for tree_id in doc.get("tree_ids", [])] |
| |
| i = M.repo.CommitDoc.m.find({"_id": {"$in": ci_ids}}).count() |
| log.info("Deleting %i CommitDoc docs...", i) |
| M.repo.CommitDoc.m.remove({"_id": {"$in": ci_ids}}) |
| |
| # delete these in chunks, otherwise the query doc can |
| # exceed the max BSON size limit (16MB at the moment) |
| for tree_ids_chunk in chunked_list(tree_ids, 300000): |
| i = M.repo.TreeDoc.m.find({"_id": {"$in": tree_ids_chunk}}).count() |
| log.info("Deleting %i TreeDoc docs...", i) |
| M.repo.TreeDoc.m.remove({"_id": {"$in": tree_ids_chunk}}) |
| i = M.repo.LastCommitDoc.m.find({"object_id": {"$in": tree_ids_chunk}}).count() |
| log.info("Deleting %i LastCommitDoc docs...", i) |
| M.repo.LastCommitDoc.m.remove({"object_id": {"$in": tree_ids_chunk}}) |
| del tree_ids |
| |
| # delete these after TreeDoc and LastCommitDoc so that if |
| # we crash, we don't lose the ability to delete those |
| i = M.repo.TreesDoc.m.find({"_id": {"$in": ci_ids}}).count() |
| log.info("Deleting %i TreesDoc docs...", i) |
| M.repo.TreesDoc.m.remove({"_id": {"$in": ci_ids}}) |
| |
| # delete LastCommitDocs for non-trees |
| repo_lastcommit_re = re.compile("^{}:".format(c.app.repo._id)) |
| i = M.repo.LastCommitDoc.m.find(dict(_id=repo_lastcommit_re)).count() |
| log.info("Deleting %i remaining LastCommitDoc docs, by repo id...", i) |
| M.repo.LastCommitDoc.m.remove(dict(_id=repo_lastcommit_re)) |
| |
| i = M.repo.DiffInfoDoc.m.find({"_id": {"$in": ci_ids}}).count() |
| log.info("Deleting %i DiffInfoDoc docs...", i) |
| M.repo.DiffInfoDoc.m.remove({"_id": {"$in": ci_ids}}) |
| |
| i = M.repo.CommitRunDoc.m.find({"commit_ids": {"$in": ci_ids}}).count() |
| log.info("Deleting %i CommitRunDoc docs...", i) |
| M.repo.CommitRunDoc.m.remove({"commit_ids": {"$in": ci_ids}}) |
| del ci_ids |
| |
| try: |
| if options.all: |
| log.info('Refreshing ALL commits in %r', c.app.repo) |
| else: |
| log.info('Refreshing NEW commits in %r', c.app.repo) |
| if options.profile: |
| import cProfile |
| cProfile.runctx('c.app.repo.refresh(options.all, notify=options.notify)', |
| globals(), locals(), 'refresh.profile') |
| else: |
| c.app.repo.refresh(options.all, notify=options.notify) |
| except: |
| log.exception('Error refreshing %r', c.app.repo) |
| ThreadLocalORMSession.flush_all() |
| ThreadLocalORMSession.close_all() |
| |
| |
| def repo_type_list(s): |
| repo_types = [] |
| for repo_type in s.split(','): |
| repo_type = repo_type.strip() |
| if repo_type not in ['svn', 'git', 'hg']: |
| raise argparse.ArgumentTypeError( |
| '{} is not a valid repo type.'.format(repo_type)) |
| repo_types.append(repo_type) |
| return repo_types |
| |
| |
| def parse_options(): |
| parser = argparse.ArgumentParser(description='Scan repos on filesytem and ' |
| 'update repo metadata in MongoDB. Run for all repos (no args), ' |
| 'or restrict by neighborhood, project, or code tool mount point.') |
| parser.add_argument('--nbhd', action='store', default='', dest='nbhd', |
| help='Restrict update to a particular neighborhood, e.g. /p/.') |
| parser.add_argument('--project', action='store', default='', dest='project', |
| help='Restrict update to a particular project. To specify a ' |
| 'subproject, use a slash: project/subproject.') |
| parser.add_argument('--project-regex', action='store', default='', |
| dest='project_regex', |
| help='Restrict update to projects for which the shortname matches ' |
| 'the provided regex.') |
| parser.add_argument('--repo-types', action='store', type=repo_type_list, |
| default=['svn', 'git', 'hg'], dest='repo_types', |
| help='Only refresh repos of the given type(s). Defaults to: ' |
| 'svn,git,hg. Example: --repo-types=git,hg') |
| parser.add_argument('--mount_point', default='', dest='mount_point', |
| help='Restrict update to repos at the given tool mount point. ') |
| parser.add_argument('--clean', action='store_true', dest='clean', |
| default=False, help='Remove repo-related mongo docs (for ' |
| 'project(s) being refreshed only) before doing the refresh.') |
| parser.add_argument('--clean-all', action='store_true', dest='clean_all', |
| default=False, help='Remove ALL repo-related mongo docs before ' |
| 'refresh.') |
| parser.add_argument('--all', action='store_true', dest='all', default=False, |
| help='Refresh all commits (not just the ones that are new).') |
| parser.add_argument('--notify', action='store_true', dest='notify', |
| default=False, help='Send email notifications of new commits.') |
| parser.add_argument('--dry-run', action='store_true', dest='dry_run', |
| default=False, help='Log names of projects that would have their ' |
| 'repos refreshed, but do not perform the actual refresh.') |
| parser.add_argument('--profile', action='store_true', dest='profile', |
| default=False, help='Enable the profiler (slow). Will log ' |
| 'profiling output to ./refresh.profile') |
| return parser.parse_args() |
| |
| if __name__ == '__main__': |
| import sys |
| faulthandler.enable() |
| sys.exit(main(parse_options())) |