| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| from __future__ import annotations |
| import json |
| import os |
| import stat |
| from operator import itemgetter |
| import mimetypes |
| import logging |
| import string |
| import re |
| from subprocess import Popen, PIPE |
| from hashlib import sha1 |
| from datetime import datetime, timedelta |
| from time import time |
| from collections import defaultdict, OrderedDict |
| from six.moves.urllib.parse import urljoin |
| from threading import Thread |
| from six.moves.queue import Queue |
| from itertools import chain, islice |
| import typing |
| |
| import tg |
| from paste.deploy.converters import asint, asbool |
| from tg import tmpl_context as c |
| from tg import app_globals as g |
| import pymongo |
| import pymongo.errors |
| import bson |
| import six |
| |
| from ming import schema as S |
| from ming.orm import session |
| from ming import Field, collection, Index |
| from ming.utils import LazyProperty |
| from ming.odm import FieldProperty, session, Mapper, mapper, MappedClass, RelationProperty |
| from ming.base import Object |
| |
| from allura.lib import helpers as h |
| from allura.lib import utils |
| from allura.lib.security import has_access |
| |
| from .artifact import Artifact, VersionedArtifact |
| from .auth import User |
| from .timeline import ActivityObject |
| from .monq_model import MonQTask |
| from .project import AppConfig |
| from .session import main_doc_session |
| from .session import repository_orm_session |
| |
| if typing.TYPE_CHECKING: |
| from ming.odm.mapper import Query |
| |
| |
| log = logging.getLogger(__name__) |
| config = utils.ConfigProxy( |
| common_suffix='forgemail.domain', |
| ) |
| |
| README_RE = re.compile(r'^README(\.[^.]*)?$', re.IGNORECASE) |
| VIEWABLE_EXTENSIONS = frozenset([ |
| '.php', '.py', '.js', '.java', '.html', '.htm', '.yaml', '.sh', |
| '.rb', '.phtml', '.txt', '.bat', '.ps1', '.xhtml', '.css', '.cfm', '.jsp', '.jspx', |
| '.pl', '.php4', '.php3', '.rhtml', '.svg', '.markdown', '.json', '.ini', '.tcl', '.vbs', '.xsl']) |
| |
| |
| # Some schema types |
| SUser = dict(name=str, email=str, date=datetime) |
| SObjType = S.OneOf('blob', 'tree', 'submodule', 'symlink') |
| |
| # Used for when we're going to batch queries using $in |
| QSIZE = 100 |
| BINARY_EXTENSIONS = frozenset([ |
| ".3ds", ".3g2", ".3gp", ".7z", ".a", ".aac", ".adp", ".ai", ".aif", ".apk", ".ar", ".asf", ".au", ".avi", ".bak", |
| ".bin", ".bk", ".bmp", ".btif", ".bz2", ".cab", ".caf", ".cgm", ".cmx", ".cpio", ".cr2", ".dat", ".deb", ".djvu", |
| ".dll", ".dmg", ".dng", ".doc", ".docx", ".dra", ".DS_Store", ".dsk", ".dts", ".dtshd", ".dvb", ".dwg", ".dxf", |
| ".ecelp4800", ".ecelp7470", ".ecelp9600", ".egg", ".eol", ".eot", ".epub", ".exe", ".f4v", ".fbs", ".fh", ".fla", |
| ".flac", ".fli", ".flv", ".fpx", ".fst", ".fvt", ".g3", ".gif", ".gz", ".h261", ".h263", ".h264", ".ico", ".ief", |
| ".img", ".ipa", ".iso", ".jar", ".jpeg", ".jpg", ".jpgv", ".jpm", ".jxr", ".ktx", ".lvp", ".lz", ".lzma", ".lzo", |
| ".m3u", ".m4a", ".m4v", ".mar", ".mdi", ".mid", ".mj2", ".mka", ".mkv", ".mmr", ".mng", ".mov", ".movie", ".mp3", |
| ".mp4", ".mp4a", ".mpeg", ".mpg", ".mpga", ".mxu", ".nef", ".npx", ".o", ".oga", ".ogg", ".ogv", ".otf", ".pbm", |
| ".pcx", ".pdf", ".pea", ".pgm", ".pic", ".png", ".pnm", ".ppm", ".psd", ".pya", ".pyc", ".pyo", ".pyv", ".qt", |
| ".rar", ".ras", ".raw", ".rgb", ".rip", ".rlc", ".rz", ".s3m", ".s7z", ".scpt", ".sgi", ".shar", ".sil", ".smv", |
| ".so", ".sub", ".swf", ".tar", ".tbz2", ".tga", ".tgz", ".tif", ".tiff", ".tlz", ".ttf", ".uvh", ".uvi", |
| ".uvm", ".uvp", ".uvs", ".uvu", ".viv", ".vob", ".war", ".wav", ".wax", ".wbmp", ".wdp", ".weba", ".webm", ".webp", |
| ".whl", ".wm", ".wma", ".wmv", ".wmx", ".woff", ".woff2", ".wvx", ".xbm", ".xif", ".xm", ".xpi", ".xpm", ".xwd", |
| ".xz", ".z", ".zip", ".zipx" |
| ]) |
| |
| PYPELINE_EXTENSIONS = frozenset(utils.MARKDOWN_EXTENSIONS + ['.rst', '.textile', '.creole']) |
| |
| DIFF_SIMILARITY_THRESHOLD = .5 # used for determining file renames |
| |
| |
| class RepositoryImplementation: |
| |
| # Repository-specific code |
| def init(self): # pragma no cover |
| raise NotImplementedError('init') |
| |
| def clone_from(self, source_url): # pragma no cover |
| raise NotImplementedError('clone_from') |
| |
| def commit(self, revision): # pragma no cover |
| raise NotImplementedError('commit') |
| |
| def all_commit_ids(self): # pragma no cover |
| raise NotImplementedError('all_commit_ids') |
| |
| def new_commits(self, all_commits=False): # pragma no cover |
| '''Return a list of native commits in topological order (heads first). |
| |
| "commit" is a repo-native object, NOT a Commit object. |
| If all_commits is False, only return commits not already indexed. |
| ''' |
| raise NotImplementedError('new_commits') |
| |
| def commit_parents(self, commit): # pragma no cover |
| '''Return a list of native commits for the parents of the given (native) |
| commit''' |
| raise NotImplementedError('commit_parents') |
| |
| def refresh_commit_info(self, oid, seen, lazy=True): # pragma no cover |
| '''Refresh the data in the commit with id oid''' |
| raise NotImplementedError('refresh_commit_info') |
| |
| def _setup_hooks(self, source_path=None): # pragma no cover |
| '''Install a hook in the repository that will ping the refresh url for |
| the repo. Optionally provide a path from which to copy existing hooks.''' |
| raise NotImplementedError('_setup_hooks') |
| |
| # pragma no cover |
| def log(self, revs=None, path=None, exclude=None, id_only=True, **kw): |
| """ |
| Returns a generator that returns information about commits reachable |
| by revs. |
| |
| revs can be None or a list or tuple of identifiers, each of which |
| can be anything parsable by self.commit(). If revs is None, the |
| default branch head will be used. |
| |
| If path is not None, only commits which modify files under path |
| will be included. |
| |
| Exclude can be None or a list or tuple of identifiers, each of which |
| can be anything parsable by self.commit(). If not None, then any |
| revisions reachable by any of the revisions in exclude will not be |
| included. |
| |
| If id_only is True, returns only the commit ID (which can be faster), |
| otherwise it returns detailed information about each commit. |
| """ |
| raise NotImplementedError('log') |
| |
| def compute_tree_new(self, commit, path='/'): # pragma no cover |
| '''Used in hg and svn to compute a git-like-tree lazily with the new models''' |
| raise NotImplementedError('compute_tree') |
| |
| def open_blob(self, blob): # pragma no cover |
| '''Return a file-like object that contains the contents of the blob''' |
| raise NotImplementedError('open_blob') |
| |
| def blob_size(self, blob): |
| '''Return a blob size in bytes''' |
| raise NotImplementedError('blob_size') |
| |
| def tarball(self, revision, path=None): |
| '''Create a tarball for the revision''' |
| raise NotImplementedError('tarball') |
| |
| def is_empty(self): |
| '''Determine if the repository is empty by checking the filesystem''' |
| raise NotImplementedError('is_empty') |
| |
| def is_file(self, path, rev=None): |
| '''Determine if the repository is a file by checking the filesystem''' |
| raise NotImplementedError('is_file') |
| |
| @classmethod |
| def shorthand_for_commit(cls, oid): |
| return '[%s]' % oid[:6] |
| |
| def symbolics_for_commit(self, commit): |
| '''Return symbolic branch and tag names for a commit.''' |
| raise NotImplementedError('symbolics_for_commit') |
| |
| def url_for_commit(self, commit, url_type='ci'): |
| 'return an URL, given either a commit or object id' |
| if isinstance(commit, str): |
| object_id = commit |
| else: |
| object_id = commit._id |
| |
| if '/' in object_id: |
| object_id = os.path.join( |
| object_id, self._repo.app.END_OF_REF_ESCAPE) |
| |
| return os.path.join(self._repo.url(), url_type, object_id) + '/' |
| |
| def _setup_paths(self, create_repo_dir=True): |
| ''' |
| Ensure that the base directory in which the repo lives exists. |
| If create_repo_dir is True, also ensure that the directory |
| of the repo itself exists. |
| ''' |
| if not self._repo.fs_path.endswith('/'): |
| self._repo.fs_path += '/' |
| fullname = self._repo.fs_path + self._repo.name |
| # make the base dir for repo, regardless |
| if not os.path.exists(self._repo.fs_path): |
| os.makedirs(self._repo.fs_path) |
| if create_repo_dir and not os.path.exists(fullname): |
| os.mkdir(fullname) |
| return fullname |
| |
| def _setup_special_files(self, source_path=None): |
| magic_file = os.path.join( |
| self._repo.fs_path, self._repo.name, tg.config.get( |
| 'scm.magic_file', '.ALLURA-REPOSITORY')) |
| with open(magic_file, 'w') as f: |
| f.write(six.ensure_text(self._repo.repo_id)) |
| os.chmod(magic_file, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) |
| self._setup_hooks(source_path) |
| |
| @property |
| def head(self): |
| raise NotImplementedError('head') |
| |
| @property |
| def heads(self): |
| raise NotImplementedError('heads') |
| |
| @property |
| def branches(self): |
| raise NotImplementedError('branches') |
| |
| @property |
| def tags(self): |
| raise NotImplementedError('tags') |
| |
| def last_commit_ids(self, commit, paths): |
| ''' |
| Return a mapping {path: commit_id} of the _id of the last |
| commit to touch each path, starting from the given commit. |
| |
| Chunks the set of paths based on lcd_thread_chunk_size and |
| runs each chunk (if more than one) in a separate thread. |
| |
| Each thread will call :meth:`_get_last_commit` to get the |
| commit ID and list of changed files for the last commit |
| to touch any file in a given chunk. |
| ''' |
| if not paths: |
| return {} |
| timeout = float(tg.config.get('lcd_timeout', 60)) |
| start_time = time() |
| paths = list(set(paths)) # remove dupes |
| result = {} # will be appended to from each thread |
| chunks = Queue() |
| lcd_chunk_size = asint(tg.config.get('lcd_thread_chunk_size', 10)) |
| num_threads = 0 |
| for s in range(0, len(paths), lcd_chunk_size): |
| chunks.put(paths[s:s + lcd_chunk_size]) |
| num_threads += 1 |
| |
| def get_ids(): |
| paths = set(chunks.get()) |
| try: |
| commit_id = commit._id |
| while paths and commit_id: |
| if time() - start_time >= timeout: |
| log.error('last_commit_ids timeout for %s on %s', |
| commit._id, ', '.join(paths)) |
| break |
| commit_id, changes = self._get_last_commit( |
| commit._id, paths) |
| if commit_id is None: |
| break |
| changed = prefix_paths_union(paths, changes) |
| for path in changed: |
| result[path] = commit_id |
| paths -= changed |
| except Exception as e: |
| log.exception('Error in SCM thread: %s', e) |
| finally: |
| chunks.task_done() |
| if num_threads == 1: |
| get_ids() |
| else: |
| for i in range(num_threads): |
| t = Thread(target=get_ids) |
| t.start() |
| # reimplement chunks.join() but with a timeout |
| # see: http://bugs.python.org/issue9634 |
| # (giving threads a bit of extra cleanup time in case they timeout) |
| chunks.all_tasks_done.acquire() |
| try: |
| endtime = time() + timeout + 0.5 |
| while chunks.unfinished_tasks and endtime > time(): |
| chunks.all_tasks_done.wait(endtime - time()) |
| finally: |
| chunks.all_tasks_done.release() |
| return result |
| |
| def _get_last_commit(self, commit_id, paths): |
| """ |
| For a given commit ID and set of paths / files, |
| use the SCM to determine the last commit to touch |
| any of the given paths / files. |
| |
| Should return a tuple containing the ID of the |
| commit and the list of all files changed in the commit. |
| """ |
| raise NotImplementedError('_get_last_commit') |
| |
| def get_changes(self, commit_id): |
| """ |
| Return the list of files changed by a given commit. |
| """ |
| raise NotImplementedError('get_changes') |
| |
| def paged_diffs(self, commit_id, start=0, end=None, onlyChangedFiles=False): |
| """ |
| Returns files touched by the commit, grouped by status (added, removed, |
| and changed) and the total number of such files. Paginates according |
| to :param start: and :param end:. |
| """ |
| raise NotImplementedError('paged_diffs') |
| |
| def merge_request_commits(self, mr): |
| """Given MergeRequest :param mr: return list of commits to be merged""" |
| raise NotImplementedError('merge_request_commits') |
| |
| |
| class Repository(Artifact, ActivityObject): |
| BATCH_SIZE = 100 |
| |
| class __mongometa__: |
| name = 'generic-repository' |
| indexes = ['upstream_repo.name'] |
| |
| query: 'Query[Repository]' |
| |
| _impl = None |
| repo_id = 'repo' |
| type_s = 'Repository' |
| _refresh_precompute = True |
| |
| name = FieldProperty(str) |
| tool = FieldProperty(str) |
| fs_path = FieldProperty(str) |
| url_path = FieldProperty(str) |
| status = FieldProperty(str) |
| additional_viewable_extensions = FieldProperty(str) |
| heads = FieldProperty(S.Deprecated) |
| branches = FieldProperty(S.Deprecated) |
| repo_tags = FieldProperty(S.Deprecated) |
| upstream_repo = FieldProperty(dict(name=str, url=str)) |
| default_branch_name = FieldProperty(str) |
| cached_branches = FieldProperty([dict(name=str, object_id=str)]) |
| cached_tags = FieldProperty([dict(name=str, object_id=str)]) |
| |
| def __init__(self, **kw): |
| if 'name' in kw and 'tool' in kw: |
| if kw.get('fs_path') is None: |
| kw['fs_path'] = self.default_fs_path(c.project, kw['tool']) |
| if kw.get('url_path') is None: |
| kw['url_path'] = self.default_url_path(c.project, kw['tool']) |
| super().__init__(**kw) |
| |
| @property |
| def activity_name(self): |
| return 'repo %s' % self.name |
| |
| @classmethod |
| def default_fs_path(cls, project, tool): |
| repos_root = tg.config.get('scm.repos.root', '/') |
| # if a user-project, the repository path on disk needs to use the actual shortname |
| # the nice url might have invalid chars |
| return os.path.join(repos_root, tool, project.url(use_userproject_shortname=True)[1:]) |
| |
| @classmethod |
| def default_url_path(cls, project, tool): |
| # if a user-project, the repository checkout path needs to use the actual shortname used on disk |
| # not a nicer username (e.g. with underscores) used on web browsing urls |
| return project.url(use_userproject_shortname=True) |
| |
| @property |
| def tarball_path(self): |
| return os.path.join(tg.config.get('scm.repos.tarball.root', '/'), |
| self.tool, |
| self.project.shortname[:1], |
| self.project.shortname[:2], |
| self.project.shortname, |
| self.name) |
| |
| @property |
| def tarball_tmpdir(self): |
| return os.path.join(tg.config.get('scm.repos.tarball.tmpdir', tg.config.get('scm.repos.tarball.root', '/')), |
| self.tool, |
| self.project.shortname[:1], |
| self.project.shortname[:2], |
| self.project.shortname, |
| self.name) |
| |
| def tarball_filename(self, revision, path=None): |
| shortname = c.project.shortname.replace('/', '-') |
| mount_point = c.app.config.options.mount_point |
| filename = f'{shortname}-{mount_point}-{revision}' |
| return filename |
| |
| def tarball_url(self, revision, path=None): |
| filename = '{}{}'.format(self.tarball_filename(revision, path), '.zip') |
| r = os.path.join(self.tool, |
| self.project.shortname[:1], |
| self.project.shortname[:2], |
| self.project.shortname, |
| self.name, |
| filename) |
| return urljoin(tg.config.get('scm.repos.tarball.url_prefix', '/'), r) |
| |
| def get_tarball_status(self, revision, path=None): |
| pathname = os.path.join( |
| self.tarball_path, self.tarball_filename(revision, path)) |
| filename = '{}{}'.format(pathname, '.zip') |
| if os.path.isfile(filename.encode('utf-8')): |
| return 'complete' |
| |
| # file doesn't exist, check for busy task |
| task = MonQTask.query.get(**{ |
| 'task_name': 'allura.tasks.repo_tasks.tarball', |
| 'args': [revision, path or ''], |
| 'state': {'$in': ['busy', 'ready']}, |
| }) |
| |
| return task.state if task else None |
| |
| def __repr__(self): # pragma no cover |
| return '<{} {}>'.format( |
| self.__class__.__name__, |
| self.full_fs_path) |
| |
| # Proxy to _impl |
| def init(self): |
| return self._impl.init() |
| |
| def commit(self, rev): |
| return self._impl.commit(rev) |
| |
| def all_commit_ids(self): |
| return self._impl.all_commit_ids() |
| |
| def refresh_commit_info(self, oid, seen, lazy=True): |
| return self._impl.refresh_commit_info(oid, seen, lazy) |
| |
| def open_blob(self, blob): |
| return self._impl.open_blob(blob) |
| |
| def blob_size(self, blob): |
| return self._impl.blob_size(blob) |
| |
| def shorthand_for_commit(self, oid): |
| return self._impl.shorthand_for_commit(oid) |
| |
| def symbolics_for_commit(self, commit): |
| return self._impl.symbolics_for_commit(commit) |
| |
| def url_for_commit(self, commit, url_type='ci'): |
| return self._impl.url_for_commit(commit, url_type) |
| |
| def compute_tree_new(self, commit, path='/'): |
| return self._impl.compute_tree_new(commit, path) |
| |
| def last_commit_ids(self, commit, paths): |
| return self._impl.last_commit_ids(commit, paths) |
| |
| def get_changes(self, commit_id): |
| return self._impl.get_changes(commit_id) |
| |
| def is_empty(self): |
| return self._impl.is_empty() |
| |
| def is_file(self, path, rev=None): |
| return self._impl.is_file(path, rev) |
| |
| def get_heads(self): |
| """ |
| Return list of heads for the repo. |
| |
| It's get_heads() instead of a heads (lazy) property because it would |
| conflict with the now deprecated heads field. Eventually, we should |
| try to remove the deprecated fields and clean this up. |
| """ |
| return self._impl.heads |
| |
| def get_branches(self): |
| """ |
| Return list of branches for the repo. |
| |
| It's get_branches() instead of a branches (lazy) property because it |
| would conflict with the now deprecated branches field. Eventually, we |
| should try to remove the deprecated fields and clean this up. |
| """ |
| return self._impl.branches |
| |
| def get_tags(self, for_merge_request=False): |
| """ |
| Return list of tags for the repo. |
| |
| It's get_tags() instead of a tags (lazy) property because it |
| would conflict with the now deprecated tags field. Eventually, we |
| should try to remove the deprecated fields and clean this up. |
| """ |
| return self._impl.tags |
| |
| @property |
| def head(self): |
| return self._impl.head |
| |
| def set_default_branch(self, name): |
| return self._impl.set_default_branch(name) |
| |
| def paged_diffs(self, commit_id, start=0, end=None, onlyChangedFiles=False): |
| return self._impl.paged_diffs(commit_id, start, end, onlyChangedFiles) |
| |
| def init_as_clone(self, source_path, source_name, source_url): |
| self.upstream_repo.name = source_name |
| self.upstream_repo.url = source_url |
| session(self).flush(self) |
| source = source_path if source_path else source_url |
| self._impl.clone_from(source) |
| log.info('... %r cloned', self) |
| g.post_event('repo_cloned', source_url, source_path) |
| self.refresh(notify=False, new_clone=True) |
| |
| def log(self, revs=None, path=None, exclude=None, id_only=True, limit=None, **kw): |
| """ |
| Returns a generator that returns information about commits reachable |
| by revs which modify path. |
| |
| revs can either be a single revision identifier or a list or tuple |
| of identifiers, each of which can be anything parsable by self.commit(). |
| If revs is None, the default branch head will be used. |
| |
| If path is not None, then only commits which change files under path |
| will be included. |
| |
| Exclude can be None, a single revision identifier, or a list or tuple of |
| identifiers, each of which can be anything parsable by self.commit(). |
| If not None, then any revisions reachable by any of the revisions in |
| exclude will not be included. |
| |
| If id_only is True, returns only the commit ID (which can be faster), |
| otherwise it returns detailed information about each commit. |
| """ |
| if revs is not None and not isinstance(revs, (list, tuple)): |
| revs = [revs] |
| if exclude is not None and not isinstance(exclude, (list, tuple)): |
| exclude = [exclude] |
| log_iter = self._impl.log(revs, path, exclude=exclude, id_only=id_only, limit=limit, **kw) |
| return islice(log_iter, limit) |
| |
| def latest(self, branch=None): |
| if self._impl is None: |
| return None |
| if branch is None: |
| branch = self.app.default_branch_name |
| try: |
| return self.commit(branch) |
| except Exception: |
| log.exception('Cannot get latest commit for a branch: %s', branch) |
| return None |
| |
| def url(self): |
| return self.app_config.url() |
| |
| def refresh_url(self): |
| refresh_base_url = tg.config.get('scm.repos.refresh_base_url') or tg.config.get('base_url', 'http://localhost:8080') |
| return '/'.join([ |
| refresh_base_url.rstrip('/'), |
| 'auth/refresh_repo', |
| self.url().lstrip('/'), |
| ]) |
| |
| def shorthand_id(self): |
| return self.name |
| |
| @property |
| def email_address(self): |
| return f'noreply@{self.email_domain}{config.common_suffix}' |
| |
| def index(self): |
| result = Artifact.index(self) |
| result.update( |
| name_s=self.name, |
| type_s=self.type_s, |
| title=f'{self.project.name} {self.app.tool_label} repository') |
| return result |
| |
| @property |
| def full_fs_path(self): |
| return os.path.join(self.fs_path, self.name) |
| |
| def suggested_clone_dest_path(self): |
| owning_user = c.project.user_project_of |
| if owning_user: |
| projname = owning_user.username |
| else: |
| projname = c.project.shortname.replace('/', '-') |
| return f'{projname}-{self.name}' |
| |
| def clone_url(self, category, username=''): |
| '''Return a URL string suitable for copy/paste that describes _this_ repo, |
| e.g., for use in a clone/checkout command |
| ''' |
| if self.app.config.options.get('external_checkout_url', None): |
| tpl = string.Template(self.app.config.options.external_checkout_url) |
| else: |
| tpl = string.Template(tg.config.get(f'scm.host.{category}.{self.tool}')) |
| url = tpl.substitute(dict(username=username, path=self.url_path + self.name)) |
| # this is an svn option, but keeps clone_*() code from diverging |
| url += self.app.config.options.get('checkout_url', '') |
| return url |
| |
| def clone_url_first(self, anon, username=''): |
| ''' |
| Get first clone_url option, useful for places where we need to show just one |
| |
| :param bool anon: Anonymous or not |
| :param str username: optional |
| ''' |
| cat = self.clone_command_categories(anon=anon)[0]['key'] |
| return self.clone_url(cat, username) |
| |
| def clone_command(self, category, username=''): |
| '''Return a string suitable for copy/paste that would clone this repo locally |
| ''' |
| if not username and c.user not in (None, User.anonymous()): |
| username = c.user.username |
| tpl = string.Template(tg.config.get(f'scm.clone.{category}.{self.tool}') or |
| tg.config.get('scm.clone.%s' % self.tool)) |
| return tpl.substitute(dict(username=username, |
| source_url=self.clone_url(category, username), |
| dest_path=self.suggested_clone_dest_path())) |
| |
| def clone_command_first(self, anon, username=''): |
| ''' |
| Get first clone_command option, useful for places where we need to show just one |
| |
| :param bool anon: Anonymous or not |
| :param str username: optional |
| ''' |
| cat = self.clone_command_categories(anon=anon)[0]['key'] |
| return self.clone_command(cat, username) |
| |
| def clone_command_categories(self, anon): |
| conf = tg.config.get('scm.clonechoices{}.{}'.format('_anon' if anon else '', self.tool)) |
| if not conf and anon: |
| # check for a non-anon config |
| conf = tg.config.get(f'scm.clonechoices.{self.tool}') |
| if conf: |
| return json.loads(conf) |
| elif anon: |
| # defaults to match historical scm.clone.* configs, in case someone updates Allura but not their .ini |
| return [{"name": "RO", "key": "ro", "title": "Read Only"}, |
| {"name": "HTTPS", "key": "https_anon", "title": "HTTPS"}] |
| else: |
| return [{"name": "RW", "key": "rw", "title": "Read/Write"}, |
| {"name": "RO", "key": "ro", "title": "Read Only"}, |
| {"name": "HTTPS", "key": "https", "title": "HTTPS"}] |
| |
| def merge_requests_by_statuses(self, *statuses): |
| return MergeRequest.query.find(dict( |
| app_config_id=self.app.config._id, |
| status={'$in': statuses})).sort( |
| 'request_number') |
| |
| def all_merge_requests(self): |
| return MergeRequest.query.find(dict( |
| app_config_id=self.app.config._id)).sort( |
| 'request_number') |
| |
| @LazyProperty |
| def _additional_viewable_extensions(self): |
| ext_list = self.additional_viewable_extensions or '' |
| ext_list = [ext.strip() for ext in ext_list.split(',') if ext] |
| ext_list += ['.ini', '.gitignore', '.svnignore', 'README'] |
| return ext_list |
| |
| def guess_type(self, name): |
| '''Guess the mime type and encoding of a given filename''' |
| content_type, encoding = mimetypes.guess_type(name) |
| if content_type is None or not content_type.startswith('text/'): |
| fn, ext = os.path.splitext(name) |
| ext = ext or fn |
| if ext in self._additional_viewable_extensions: |
| content_type, encoding = 'text/plain', None |
| if content_type is None: |
| content_type, encoding = 'application/octet-stream', None |
| return content_type, encoding |
| |
| def unknown_commit_ids(self): |
| from allura.model.repo_refresh import unknown_commit_ids as unknown_commit_ids_repo |
| return unknown_commit_ids_repo(self.all_commit_ids()) |
| |
| def refresh(self, all_commits=False, notify=True, new_clone=False, commits_are_new=None): |
| '''Find any new commits in the repository and update''' |
| try: |
| from allura.model.repo_refresh import refresh_repo |
| log.info('... %r analyzing', self) |
| self.set_status('analyzing') |
| refresh_repo(self, all_commits, notify, new_clone, commits_are_new) |
| finally: |
| log.info('... %s ready', self) |
| self.set_status('ready') |
| |
| def push_upstream_context(self): |
| project, rest = h.find_project(self.upstream_repo.name) |
| with h.push_context(project._id): |
| app = project.app_instance(rest[0]) |
| return h.push_context(project._id, app_config_id=app.config._id) |
| |
| def pending_upstream_merges(self): |
| q = { |
| 'downstream.project_id': self.project_id, |
| 'downstream.mount_point': self.app.config.options.mount_point, |
| 'status': 'open'} |
| with self.push_upstream_context(): |
| return MergeRequest.query.find(q).count() |
| |
| @property |
| def forks(self): |
| all_forks = self.query.find({'upstream_repo.name': self.url()}).all() |
| return [fork for fork in all_forks if fork.app_config is not None |
| and fork.app_config.project is not None] |
| |
| def tarball(self, revision, path=None): |
| if path: |
| path = path.strip('/') |
| self._impl.tarball(revision, path) |
| |
| def rev_to_commit_id(self, rev): |
| raise NotImplementedError('rev_to_commit_id') |
| |
| def set_status(self, status): |
| ''' |
| Update (and flush) the repo status indicator. |
| |
| Updates to the repo status (or any Repository field) are considered |
| project updates (because Repositories are Artifacts; see |
| `Artifact.__mongometa__.before_save`) and thus change `last_updated` |
| on `c.project`, which causes `c.project` to be flushed. |
| |
| Because repo status changes can come at the end or middle of a long |
| operation, `c.project` can be quite stale, so this flushes and reloads |
| `c.project`. |
| ''' |
| from allura.model import Project |
| project_session = session(c.project) |
| if project_session: |
| session(c.project).flush(c.project) |
| session(c.project).expunge(c.project) |
| c.project = Project.query.get(_id=c.project._id) |
| self.status = status |
| session(self).flush(self) |
| |
| def get_default_branch(self, default_branch_name): |
| branch_name = getattr(self, 'default_branch_name', None) or default_branch_name |
| branches = [] |
| if not self.is_empty(): |
| branches = [b.name for b in self.get_branches()] |
| |
| if branches and branch_name not in branches: |
| if default_branch_name in branches: |
| branch_name = default_branch_name |
| else: |
| branch_name = branches[0] |
| self.set_default_branch(branch_name) |
| return branch_name |
| |
| def merge_request_commits(self, mr): |
| """Given MergeRequest :param mr: return list of commits to be merged""" |
| return self._impl.merge_request_commits(mr) |
| |
| |
| class MergeRequest(VersionedArtifact, ActivityObject): |
| statuses = ['open', 'merged', 'rejected'] |
| |
| class __mongometa__: |
| name = 'merge-request' |
| indexes = ['commit_id', 'creator_id'] |
| unique_indexes = [('app_config_id', 'request_number')] |
| |
| query: 'Query[MergeRequest]' |
| |
| type_s = 'MergeRequest' |
| |
| request_number = FieldProperty(int) |
| status = FieldProperty(str, if_missing='open') |
| downstream = FieldProperty(dict( |
| project_id=S.ObjectId, |
| mount_point=str, |
| commit_id=str)) |
| source_branch = FieldProperty(str, if_missing='') |
| target_branch = FieldProperty(str) |
| creator_id = FieldProperty(S.ObjectId, if_missing=lambda: c.user._id) |
| created = FieldProperty(datetime, if_missing=datetime.utcnow) |
| summary = FieldProperty(str) |
| description = FieldProperty(str) |
| can_merge_cache = FieldProperty({str: bool}) |
| new_commits = FieldProperty([S.Anything], if_missing=None) # don't access directly, use `commits` property |
| |
| @property |
| def activity_name(self): |
| return 'merge request #%s' % self.request_number |
| |
| @property |
| def type_name(self): |
| return 'merge request' |
| |
| @property |
| def activity_extras(self): |
| d = ActivityObject.activity_extras.fget(self) |
| d.update(summary=self.summary) |
| return d |
| |
| @LazyProperty |
| def creator(self): |
| from allura import model as M |
| return M.User.query.get(_id=self.creator_id) |
| |
| @LazyProperty |
| def creator_name(self): |
| return self.creator.get_pref('display_name') or self.creator.username |
| |
| @LazyProperty |
| def creator_url(self): |
| return self.creator.url() |
| |
| @LazyProperty |
| def downstream_url(self): |
| with self.push_downstream_context(): |
| return c.app.url |
| |
| @LazyProperty |
| def downstream_repo(self): |
| with self.push_downstream_context(): |
| return c.app.repo |
| |
| def push_downstream_context(self): |
| return h.push_context(self.downstream.project_id, self.downstream.mount_point) |
| |
| @property |
| def commits(self): |
| if self.new_commits is not None: |
| return self.new_commits |
| |
| with self.push_downstream_context(): |
| # update the cache key only, being careful not to touch anything else that ming will try to flush later |
| # this avoids race conditions with the `set_can_merge_cache()` caching and clobbering fields |
| new_commits = c.app.repo.merge_request_commits(self) |
| self.query.update({'$set': {'new_commits': new_commits}}) |
| return new_commits |
| |
| @classmethod |
| def upsert(cls, **kw): |
| num = cls.query.find(dict( |
| app_config_id=c.app.config._id)).count() + 1 |
| while True: |
| try: |
| r = cls(request_number=num, **kw) |
| session(r).flush(r) |
| return r |
| except pymongo.errors.DuplicateKeyError: # pragma no cover |
| session(r).expunge(r) |
| num += 1 |
| |
| def url(self): |
| return self.app.url + 'merge-requests/%s/' % self.request_number |
| |
| def index(self): |
| result = Artifact.index(self) |
| result.update( |
| name_s='Merge Request #%d' % self.request_number, |
| type_s=self.type_s, |
| title='Merge Request #%d: %s' % (self.request_number, self.summary), |
| ) |
| return result |
| |
| @property |
| def email_subject(self): |
| return 'Merge request: ' + self.summary |
| |
| def merge_allowed(self, user): |
| """ |
| Returns true if a merge is allowed by system and tool configuration. |
| """ |
| if not self.app.forkable: |
| return False |
| if self.status != 'open': |
| return False |
| if asbool(tg.config.get(f'scm.merge.{self.app.config.tool_name}.disabled')): |
| return False |
| if not h.has_access(self.app, 'write', user): |
| return False |
| if self.app.config.options.get('merge_disabled'): |
| return False |
| return True |
| |
| def can_merge_cache_key(self): |
| """ |
| Returns key for can_merge_cache constructed from current |
| source & target branch commits. |
| """ |
| source_hash = self.downstream.commit_id |
| target_hash = self.app.repo.commit(self.target_branch)._id |
| key = f'{source_hash}-{target_hash}' |
| return key |
| |
| def get_can_merge_cache(self): |
| """Returns True/False or None in case of cache miss.""" |
| key = self.can_merge_cache_key() |
| return self.can_merge_cache.get(key) |
| |
| def set_can_merge_cache(self, val): |
| key = self.can_merge_cache_key() |
| # update the cache key only, being careful not to touch anything else that ming will try to flush later |
| # this avoids race conditions with the `commits()` caching and clobbering fields |
| can_merge_cache = self.can_merge_cache._deinstrument() |
| can_merge_cache[key] = val |
| self.query.update({'$set': {'can_merge_cache': can_merge_cache}}) |
| |
| def can_merge(self): |
| """ |
| Returns boolean indicating if automatic merge is possible (no |
| conflicts). If result is unknown yet, returns None and fires a task to |
| get the result. Caches result for later reuse. |
| """ |
| if not self.merge_allowed(c.user): |
| return None |
| if self.status == 'merged': |
| return True |
| cached = self.get_can_merge_cache() |
| if cached is not None: |
| return cached |
| in_progress = self.can_merge_task_status() in ['ready', 'busy'] |
| if self.app.forkable and not in_progress: |
| from allura.tasks import repo_tasks |
| repo_tasks.can_merge.post(self._id) |
| |
| def merge(self): |
| in_progress = self.merge_task_status() in ['ready', 'busy'] |
| if self.app.forkable and not in_progress: |
| from allura.tasks import repo_tasks |
| repo_tasks.merge.post(self._id) |
| |
| def _task_status(self, task_name): |
| task = MonQTask.query.find({ |
| 'state': {'$in': ['busy', 'complete', 'error', 'ready']}, # needed to use index |
| 'task_name': task_name, |
| 'args': [self._id], |
| 'time_queue': {'$gt': datetime.utcnow() - timedelta(days=1)}, # constrain on index further |
| }).sort('_id', -1).limit(1).first() |
| if task: |
| return task.state |
| return None |
| |
| def merge_task_status(self): |
| return self._task_status('allura.tasks.repo_tasks.merge') |
| |
| def can_merge_task_status(self): |
| return self._task_status('allura.tasks.repo_tasks.can_merge') |
| |
| def commits_task_status(self): |
| return self._task_status('allura.tasks.repo_tasks.determine_mr_commits') |
| |
| def add_meta_post(self, changes): |
| tmpl = g.jinja2_env.get_template('allura:templates/repo/merge_request_changed.html') |
| message = tmpl.render(changes=changes) |
| self.discussion_thread.add_post(text=message, is_meta=True, ignore_security=True) |
| |
| |
| class RepoObject: |
| |
| def __repr__(self): # pragma no cover |
| return '<{} {}>'.format( |
| self.__class__.__name__, self._id) |
| |
| def primary(self): |
| return self |
| |
| def index_id(self): |
| '''Globally unique artifact identifier. Used for |
| SOLR ID, shortlinks, and maybe elsewhere |
| ''' |
| id = '{}.{}#{}'.format( |
| 'allura.model.repo', # preserve index_id after module consolidation |
| self.__class__.__name__, |
| self._id) |
| return id.replace('.', '/') |
| |
| @classmethod |
| def upsert(cls, id, **kwargs): |
| isnew = False |
| r = cls.query.get(_id=id) |
| if r is not None: |
| return r, isnew |
| try: |
| r = cls(_id=id, **kwargs) |
| session(r).flush(r) |
| isnew = True |
| except pymongo.errors.DuplicateKeyError: # pragma no cover |
| session(r).expunge(r) |
| r = cls.query.get(_id=id) |
| return r, isnew |
| |
| |
| # this is duplicative with the Commit model |
| # would be nice to get rid of this "doc" based view, but it is used a lot |
| CommitDoc = collection( |
| 'repo_ci', main_doc_session, |
| Field('_id', str), |
| Field('tree_id', str), |
| Field('committed', SUser), |
| Field('authored', SUser), |
| Field('message', str), |
| Field('parent_ids', [str], index=True), |
| Field('child_ids', [str], index=True), |
| Field('repo_ids', [S.ObjectId()], index=True)) |
| |
| |
| class CommitStatus(MappedClass): |
| class __mongometa__: |
| session = repository_orm_session |
| name = 'commit_status' |
| indexes = [('commit_id', 'context'),] |
| |
| query: 'Query[CommitStatus]' |
| |
| _id = FieldProperty(S.ObjectId) |
| state = FieldProperty(str) |
| target_url = FieldProperty(str) |
| description = FieldProperty(str) |
| context = FieldProperty(str) |
| commit_id = FieldProperty(str) |
| commit = RelationProperty('Commit') |
| |
| def __init__(self, **kw): |
| assert 'commit_id' in kw, 'Commit id missing' |
| super().__init__(**kw) |
| |
| @classmethod |
| def upsert(cls, **kw): |
| obj = cls.query.find_and_modify( |
| query=dict(commit_id=kw.get('commit_id'), context=kw.get('context')), |
| update={'$set': kw}, |
| new=True, |
| upsert=True, |
| ) |
| return obj |
| |
| |
| class Commit(MappedClass, RepoObject, ActivityObject): |
| # Basic commit information |
| # One of these for each commit in the physical repo on disk |
| |
| class __mongometa__: |
| session = repository_orm_session |
| name = 'repo_ci' |
| indexes = [ |
| 'parent_ids', |
| 'child_ids', |
| 'repo_ids', |
| ] |
| |
| query: 'Query[Commit]' |
| |
| _id = FieldProperty(str) # hexsha of the commit (for Git and Hg) |
| tree_id = FieldProperty(str) |
| committed = FieldProperty(SUser) |
| authored = FieldProperty(SUser) |
| message = FieldProperty(str) |
| parent_ids = FieldProperty([str]) |
| child_ids = FieldProperty([str]) |
| repo_ids = FieldProperty([S.ObjectId()]) |
| statuses = RelationProperty('CommitStatus', via="commit_id") |
| |
| type_s = 'Commit' |
| # Ephemeral attrs |
| repo = None |
| |
| def __init__(self, **kw): |
| for k, v in kw.items(): |
| setattr(self, k, v) |
| |
| @property |
| def activity_name(self): |
| return self.shorthand_id() |
| |
| @property |
| def activity_extras(self): |
| d = ActivityObject.activity_extras.fget(self) |
| d.update(summary=self._summary(limit=500)) |
| if self.repo: |
| d.update(app_config_id=self.repo.app.config._id) |
| return d |
| |
| def has_activity_access(self, perm, user, activity): |
| """ |
| Check access against the original app. |
| |
| Commits have no ACLs and are therefore always viewable by any user, if |
| they have access to the tool. |
| """ |
| app_config_id = activity.obj.activity_extras.get('app_config_id') |
| if app_config_id: |
| app_config = AppConfig.query.get(_id=app_config_id) |
| return has_access(app_config, perm, user) |
| return True |
| |
| def set_context(self, repo): |
| self.repo = repo |
| |
| @LazyProperty |
| def authored_user(self): |
| return User.by_email_address(self.authored.email) |
| |
| @LazyProperty |
| def committed_user(self): |
| return User.by_email_address(self.committed.email) |
| |
| @LazyProperty |
| def author_url(self): |
| u = self.authored_user |
| if u: |
| return u.url() |
| |
| @LazyProperty |
| def committer_url(self): |
| u = self.committed_user |
| if u: |
| return u.url() |
| |
| @LazyProperty |
| def tree(self): |
| return self.get_tree(create=True) |
| |
| def get_tree(self, create=True): |
| if self.tree_id is None and create: |
| self.tree_id = self.repo.compute_tree_new(self) |
| if self.tree_id is None: |
| return None |
| cache = getattr(c, 'model_cache', '') or ModelCache() |
| t = cache.get(Tree, dict(_id=self.tree_id)) |
| if t is None and create: |
| self.tree_id = self.repo.compute_tree_new(self) |
| t = Tree.query.get(_id=self.tree_id) |
| cache.set(Tree, dict(_id=self.tree_id), t) |
| if t is not None: |
| t.set_context(self) |
| return t |
| |
| @LazyProperty |
| def summary(self): |
| return self._summary() |
| |
| def _summary(self, limit=50): |
| message = h.really_unicode(self.message) |
| first_line = message.split('\n')[0] |
| return h.text.truncate(first_line, limit) |
| |
| def shorthand_id(self): |
| if self.repo is None: |
| self.repo = self.guess_repo() |
| if self.repo is None: |
| return repr(self) |
| return self.repo.shorthand_for_commit(self._id) |
| |
| @LazyProperty |
| def symbolic_ids(self): |
| return self.repo.symbolics_for_commit(self) |
| |
| def url(self): |
| if self.repo is None: |
| self.repo = self.guess_repo() |
| if self.repo is None: |
| return '#' |
| return self.repo.url_for_commit(self) |
| |
| def guess_repo(self): |
| import traceback |
| log.error('guess_repo should not be called: %s' % |
| ''.join(traceback.format_stack())) |
| for ac in c.project.app_configs: |
| try: |
| app = c.project.app_instance(ac) |
| if app.repo._id in self.repo_ids: |
| return app.repo |
| except AttributeError: |
| pass |
| return None |
| |
| def link_text(self): |
| '''The link text that will be used when a shortlink to this artifact |
| is expanded into an <a></a> tag. |
| |
| By default this method returns type_s + shorthand_id(). Subclasses should |
| override this method to provide more descriptive link text. |
| ''' |
| return self.shorthand_id() |
| |
| def context(self): |
| result = dict(prev=None, next=None) |
| if self.parent_ids: |
| result['prev'] = self.query.find( |
| dict(_id={'$in': self.parent_ids})).all() |
| for ci in result['prev']: |
| ci.set_context(self.repo) |
| if self.child_ids: |
| result['next'] = self.query.find( |
| dict(_id={'$in': self.child_ids})).all() |
| for ci in result['next']: |
| ci.set_context(self.repo) |
| return result |
| |
| @LazyProperty |
| def diffs(self): |
| return self.paged_diffs() |
| |
| def paged_diffs(self, start=0, end=None, onlyChangedFiles=False): |
| diffs = self.repo.paged_diffs(self._id, start, end, onlyChangedFiles) |
| |
| return Object( |
| added=sorted(diffs['added']), |
| removed=sorted(diffs['removed']), |
| changed=sorted(diffs['changed']), |
| copied=sorted(diffs['copied'], key=itemgetter('new', 'old')), # this is a list of dicts |
| renamed=sorted(diffs['renamed'], key=itemgetter('new', 'old')), # this is a list of dicts |
| total=diffs['total']) |
| |
| def get_path(self, path, create=True): |
| path = path.lstrip('/') |
| parts = path.split('/') |
| cur = self.get_tree(create) |
| if cur is not None: |
| for part in parts: |
| if part != '': |
| cur = cur[part] |
| return cur |
| |
| @LazyProperty |
| def changed_paths(self): |
| ''' |
| Returns a list of paths changed in this commit. |
| Leading and trailing slashes are removed, and |
| the list is complete, meaning that if a sub-path |
| is changed, all of the parent paths are included |
| (including '' to represent the root path). |
| |
| Example: |
| |
| If the file /foo/bar is changed in the commit, |
| this would return ['', 'foo', 'foo/bar'] |
| ''' |
| changes = self.repo.get_changes(self._id) |
| changed_paths = set() |
| for change in changes: |
| node = h.really_unicode(change).strip('/') |
| changed_paths.add(node) |
| node_path = os.path.dirname(node) |
| while node_path: |
| changed_paths.add(node_path) |
| node_path = os.path.dirname(node_path) |
| changed_paths.add('') # include '/' if there are any changes |
| return changed_paths |
| |
| @LazyProperty |
| def added_paths(self): |
| ''' |
| Returns a list of paths added in this commit. |
| Leading and trailing slashes are removed, and |
| the list is complete, meaning that if a directory |
| with subdirectories is added, all of the child |
| paths are included (this relies on the :meth paged_diffs: |
| being complete). |
| |
| Example: |
| |
| If the directory /foo/bar/ is added in the commit |
| which contains a subdirectory /foo/bar/baz/ with |
| the file /foo/bar/baz/qux.txt, this would return: |
| ['foo/bar', 'foo/bar/baz', 'foo/bar/baz/qux.txt'] |
| ''' |
| paths = set() |
| for path in self.paged_diffs()['added']: |
| paths.add(path.strip('/')) |
| return paths |
| |
| @LazyProperty |
| def info(self): |
| return dict( |
| id=self._id, |
| author=self.authored.name, |
| author_email=self.authored.email, |
| date=self.authored.date, |
| author_url=self.author_url, |
| shortlink=self.shorthand_id(), |
| summary=self.summary |
| ) |
| |
| @LazyProperty |
| def webhook_info(self): |
| return { |
| 'id': self._id, |
| 'url': h.absurl(self.url()), |
| 'timestamp': self.authored.date, |
| 'message': self.summary, |
| 'author': { |
| 'name': self.authored.name, |
| 'email': self.authored.email, |
| 'username': self.authored_user.username if self.authored_user else '', |
| }, |
| 'committer': { |
| 'name': self.committed.name, |
| 'email': self.committed.email, |
| 'username': self.committed_user.username if self.committed_user else '', |
| }, |
| 'added': self.diffs.added, |
| 'removed': self.diffs.removed, |
| 'modified': self.diffs.changed, |
| 'copied': self.diffs.copied, |
| 'renamed': self.diffs.renamed, |
| } |
| |
| |
| class Tree(MappedClass, RepoObject): |
| # Basic tree information |
| class __mongometa__: |
| session = repository_orm_session |
| name = 'repo_tree' |
| indexes = [ |
| ] |
| |
| query: 'Query[Tree]' |
| |
| _id = FieldProperty(str) |
| tree_ids = FieldProperty([dict(name=str, id=str)]) |
| blob_ids = FieldProperty([dict(name=str, id=str)]) |
| other_ids = FieldProperty([dict(name=str, id=str, type=SObjType)]) |
| |
| # Ephemeral attrs |
| repo = None |
| commit = None |
| parent = None |
| name = None |
| |
| def __getitem__(self, name: str) -> Tree | Blob: |
| cache = getattr(c, 'model_cache', '') or ModelCache() |
| obj = self.by_name[name] |
| if obj['type'] == 'blob': |
| return Blob(self, name, obj['id']) |
| if obj['type'] == 'submodule': |
| log.info('Skipping submodule "%s"' % name) |
| raise KeyError(name) |
| if obj['type'] == 'symlink': |
| return Symlink(self, name, obj['id']) |
| obj = cache.get(Tree, dict(_id=obj['id'])) |
| if obj is None: |
| oid = self.repo.compute_tree_new( |
| self.commit, self.path() + name + '/') |
| obj = cache.get(Tree, dict(_id=oid)) |
| if obj is None: |
| raise KeyError(name) |
| obj.set_context(self, name) |
| return obj |
| |
| def get_obj_by_path(self, path): |
| if hasattr(path, 'get'): |
| path = path['new'] |
| if path.startswith('/'): |
| path = path[1:] |
| path = path.split('/') |
| obj = self |
| for p in path: |
| if isinstance(obj, Blob): # normally is Tree |
| # can get a Blob (incl git symlink in past) if it changed type in a commit |
| return None |
| try: |
| obj = obj[p] |
| except KeyError: |
| return None |
| return obj |
| |
| def get_blob_by_path(self, path): |
| obj = self.get_obj_by_path(path) |
| return obj if isinstance(obj, Blob) else None |
| |
| def set_context(self, commit_or_tree, name=None): |
| assert commit_or_tree is not self |
| self.repo = commit_or_tree.repo |
| if name: |
| self.commit = commit_or_tree.commit |
| self.parent = commit_or_tree |
| self.name = name |
| else: |
| self.commit = commit_or_tree |
| |
| def readme(self): |
| 'returns (filename, unicode text) if a readme file is found' |
| for x in self.blob_ids: |
| if README_RE.match(x.name): |
| name = x.name |
| blob = self[name] |
| return (x.name, h.really_unicode(blob.text)) |
| return None, None |
| |
| def ls(self): |
| ''' |
| List the entries in this tree, with historical commit info for |
| each node. |
| ''' |
| last_commit = LastCommit.get(self) |
| # ensure that the LCD is saved, even if |
| # there is an error later in the request |
| if last_commit: |
| session(last_commit).flush(last_commit) |
| return self._lcd_map(last_commit) |
| else: |
| return [] |
| |
| def _lcd_map(self, lcd): |
| ''' |
| Map "last-commit docs" to the structure that templates expect. |
| |
| (This exists because LCD logic changed in the past, whereas templates |
| were not changed) |
| ''' |
| if lcd is None: |
| return [] |
| commit_ids = [e.commit_id for e in lcd.entries] |
| commits = list(Commit.query.find(dict(_id={'$in': commit_ids}))) |
| for commit in commits: |
| commit.set_context(self.repo) |
| commit_infos = {c._id: c.info for c in commits} |
| tree_names = sorted(n.name for n in self.tree_ids) |
| blob_names = sorted(n.name for n in self.blob_ids) |
| other_names = sorted(n.name for n in self.other_ids) |
| |
| results = [] |
| for type, names in (('DIR', tree_names), ('BLOB', blob_names), ('OTHER', other_names)): |
| for name in names: |
| commit_info = commit_infos.get(lcd.by_name.get(name)) |
| if not commit_info: |
| commit_info = defaultdict(str) |
| elif 'id' in commit_info: |
| commit_info['href'] = self.repo.url_for_commit( |
| commit_info['id']) |
| results.append(dict( |
| kind=type, |
| name=name, |
| href=name, |
| last_commit=dict( |
| author=commit_info['author'], |
| author_email=commit_info['author_email'], |
| author_url=commit_info['author_url'], |
| date=commit_info.get('date'), |
| href=commit_info.get('href', ''), |
| shortlink=commit_info['shortlink'], |
| summary=commit_info['summary'], |
| ), |
| )) |
| return results |
| |
| def path(self): |
| if self.parent: |
| assert self.parent is not self |
| return self.parent.path() + self.name + '/' |
| else: |
| return '/' |
| |
| def url(self): |
| return self.commit.url() + 'tree' + self.path() |
| |
| @LazyProperty |
| def by_name(self): |
| d = Object((x.name, x) for x in self.other_ids) |
| d.update( |
| (x.name, Object(x, type='tree')) |
| for x in self.tree_ids) |
| d.update( |
| (x.name, Object(x, type='blob')) |
| for x in self.blob_ids) |
| return d |
| |
| def is_blob(self, name): |
| return self.by_name[name]['type'] == 'blob' |
| |
| def get_blob(self, name): |
| x = self.by_name[name] |
| return Blob(self, name, x.id) |
| |
| |
| class Blob: |
| '''Lightweight object representing a file in the repo''' |
| |
| def __init__(self, tree, name, _id): |
| self._id = _id |
| self.tree = tree |
| self.name = name |
| self.repo = tree.repo |
| self.commit = tree.commit |
| fn, ext = os.path.splitext(self.name) |
| self.extension = ext or fn |
| |
| def path(self): |
| return self.tree.path() + h.really_unicode(self.name) |
| |
| def url(self): |
| return self.tree.url() + h.really_unicode(self.name) |
| |
| @LazyProperty |
| def _content_type_encoding(self): |
| return self.repo.guess_type(self.name) |
| |
| @LazyProperty |
| def content_type(self): |
| return self._content_type_encoding[0] |
| |
| @LazyProperty |
| def content_encoding(self): |
| return self._content_type_encoding[1] |
| |
| @property |
| def has_pypeline_view(self): |
| if README_RE.match(self.name) or self.extension in PYPELINE_EXTENSIONS: |
| return True |
| return False |
| |
| @LazyProperty |
| def has_html_view(self): |
| ''' |
| Return true if file is a text file that can be displayed. |
| :return: boolean |
| ''' |
| if self.extension in self.repo._additional_viewable_extensions: |
| return True |
| if self.extension in BINARY_EXTENSIONS: |
| return False |
| if (self.content_type.startswith('text/') or |
| self.extension in VIEWABLE_EXTENSIONS or |
| self.extension in PYPELINE_EXTENSIONS or |
| utils.is_text_file(self.text)): |
| return True |
| return False |
| |
| @property |
| def has_image_view(self): |
| return self.content_type.startswith('image/') |
| |
| def open(self): |
| return self.repo.open_blob(self) |
| |
| def __iter__(self): |
| return iter(self.open()) |
| |
| @LazyProperty |
| def size(self): |
| return self.repo.blob_size(self) |
| |
| @LazyProperty |
| def text(self): |
| """ |
| Direct binary contents of file. Convert with h.really_unicode() if you think it is textual. |
| """ |
| return self.open().read() |
| |
| |
| class EmptyBlob(Blob): |
| def __init__(self): |
| pass |
| |
| def open(self): |
| return '' |
| |
| @property |
| def size(self): |
| return 0 |
| |
| def __bool__(self): |
| return False |
| |
| |
| class Symlink(Blob): |
| # maybe this shouldn't inherit from Blob, but its handy for now and works well with git (where symlinks are blobs) |
| pass |
| |
| |
| # this is duplicative with the LastCommit model |
| # would be nice to get rid of this "doc" based view, but it is used a lot |
| LastCommitDoc = collection( |
| 'repo_last_commit', main_doc_session, |
| Field('_id', S.ObjectId()), |
| Field('commit_id', str), |
| Field('path', str), |
| Index('commit_id', 'path'), |
| Field('entries', [dict( |
| name=str, |
| commit_id=str)])) |
| |
| |
| class LastCommit(MappedClass, RepoObject): |
| # Information about the last commit to touch a tree |
| class __mongometa__: |
| session = repository_orm_session |
| name = 'repo_last_commit' |
| indexes = [ |
| ('commit_id', 'path'), |
| ] |
| |
| query: 'Query[LastCommit]' |
| |
| _id = FieldProperty(S.ObjectId) |
| commit_id = FieldProperty(str) |
| path = FieldProperty(str) |
| entries = FieldProperty([dict( |
| name=str, |
| commit_id=str, |
| )]) |
| |
| def __repr__(self): |
| return f'<LastCommit /{self.path!r} {self.commit_id}>' |
| |
| @classmethod |
| def _last_commit_id(cls, commit, path): |
| if path == '': |
| # on the top-level dir, the relevant commit should always be the current commit |
| return commit._id |
| try: |
| rev = next(commit.repo.log(commit._id, path, id_only=True, limit=1)) |
| return commit.repo.rev_to_commit_id(rev) |
| except StopIteration: |
| log.error('Tree node not recognized by SCM: %s @ %s', |
| path, commit._id) |
| return commit._id |
| |
| @classmethod |
| def _prev_commit_id(cls, commit, path): |
| if not commit.parent_ids or path in commit.added_paths: |
| return None # new paths by definition have no previous LCD |
| lcid_cache = getattr(c, 'lcid_cache', '') |
| if lcid_cache != '' and path in lcid_cache: |
| return lcid_cache[path] |
| try: |
| log_iter = commit.repo.log(commit._id, path, id_only=True, limit=2) |
| next(log_iter) |
| rev = next(log_iter) |
| return commit.repo.rev_to_commit_id(rev) |
| except StopIteration: |
| return None |
| |
| @classmethod |
| def get(cls, tree): |
| '''Find or build the LastCommitDoc for the given tree.''' |
| cache = getattr(c, 'model_cache', '') or ModelCache() |
| path = tree.path().strip('/') |
| last_commit_id = cls._last_commit_id(tree.commit, path) |
| lcd = cache.get(cls, {'path': path, 'commit_id': last_commit_id}) |
| if lcd is None: |
| commit = cache.get(Commit, {'_id': last_commit_id}) |
| commit.set_context(tree.repo) |
| lcd = cls._build(commit.get_path(path)) |
| return lcd |
| |
| @classmethod |
| def _build(cls, tree): |
| ''' |
| Build the LCD record, presuming that this tree is where it was most |
| recently changed. |
| ''' |
| model_cache = getattr(c, 'model_cache', '') or ModelCache() |
| path = tree.path().strip('/') |
| entries = [] |
| prev_lcd = None |
| prev_lcd_cid = cls._prev_commit_id(tree.commit, path) |
| if prev_lcd_cid: |
| prev_lcd = model_cache.get( |
| cls, {'path': path, 'commit_id': prev_lcd_cid}) |
| entries = {} |
| nodes = { |
| node.name for node in chain(tree.tree_ids, tree.blob_ids, tree.other_ids)} |
| changed = { |
| node for node in nodes if os.path.join(path, node) in tree.commit.changed_paths} |
| unchanged = [os.path.join(path, node) for node in nodes - changed] |
| if prev_lcd: |
| # get unchanged entries from previously computed LCD |
| entries = prev_lcd.by_name |
| elif unchanged: |
| # no previously computed LCD, so get unchanged entries from SCM |
| # (but only ask for the ones that we know we need) |
| entries = tree.commit.repo.last_commit_ids(tree.commit, unchanged) |
| if entries is None: |
| # something strange went wrong; still show the list of files |
| # and possibly try again later |
| entries = {} |
| # paths are fully-qualified; shorten them back to just node names |
| entries = { |
| os.path.basename(path): commit_id for path, commit_id in entries.items()} |
| # update with the nodes changed in this tree's commit |
| entries.update({node: tree.commit._id for node in changed}) |
| # convert to a list of dicts, since mongo doesn't handle arbitrary keys |
| # well (i.e., . and $ not allowed) |
| entries = [{'name': name, 'commit_id': value} |
| for name, value in entries.items()] |
| lcd = cls( |
| commit_id=tree.commit._id, |
| path=path, |
| entries=entries, |
| ) |
| model_cache.set(cls, {'path': path, 'commit_id': tree.commit._id}, lcd) |
| return lcd |
| |
| @LazyProperty |
| def by_name(self): |
| return {n.name: n.commit_id for n in self.entries} |
| |
| |
| class ModelCache: |
| |
| ''' |
| Cache model instances based on query params passed to get. LRU cache. |
| |
| This does more caching than ming sessions (which only cache individual objects by _id) |
| |
| The added complexity here may be unnecessary premature optimization, but |
| should be quite helpful when building up many models in order, like lcd _build |
| for a series of several new commits. |
| ''' |
| |
| def __init__(self, max_instances=None, max_queries=None): |
| ''' |
| By default, each model type can have 2000 instances and |
| 8000 queries. You can override these for specific model |
| types by passing in a dict() for either max_instances or |
| max_queries keyed by the class(es) with the max values. |
| Classes not in the dict() will use the default 2000/8000 |
| default. |
| |
| If you pass in a number instead of a dict, that value will |
| be used as the max for all classes. |
| ''' |
| max_instances_default = 2000 |
| max_queries_default = 8000 |
| if isinstance(max_instances, int): |
| max_instances_default = max_instances |
| if isinstance(max_queries, int): |
| max_queries_default = max_queries |
| self._max_instances = defaultdict(lambda: max_instances_default) |
| self._max_queries = defaultdict(lambda: max_queries_default) |
| if hasattr(max_instances, 'items'): |
| self._max_instances.update(max_instances) |
| if hasattr(max_queries, 'items'): |
| self._max_queries.update(max_queries) |
| |
| # keyed by query, holds _id |
| self._query_cache = defaultdict(OrderedDict) |
| self._instance_cache = defaultdict(OrderedDict) # keyed by _id |
| |
| def _normalize_query(self, query): |
| _query = query |
| if not isinstance(_query, tuple): |
| _query = tuple(sorted(list(_query.items()), key=lambda k: k[0])) |
| return _query |
| |
| def _model_query(self, cls): |
| if hasattr(cls, 'query'): |
| return cls.query |
| elif hasattr(cls, 'm'): |
| return cls.m |
| else: |
| raise AttributeError( |
| '%s has neither "query" nor "m" attribute' % cls) |
| |
| def get(self, cls, query): |
| _query = self._normalize_query(query) |
| self._touch(cls, _query) |
| if _query not in self._query_cache[cls]: |
| val = self._model_query(cls).get(**query) |
| self.set(cls, _query, val) |
| return val |
| _id = self._query_cache[cls][_query] |
| if _id is None: |
| return None |
| if _id not in self._instance_cache[cls]: |
| val = self._model_query(cls).get(**query) |
| self.set(cls, _query, val) |
| return val |
| return self._instance_cache[cls][_id] |
| |
| def set(self, cls, query, val): |
| _query = self._normalize_query(query) |
| if val is not None: |
| _id = getattr(val, '_model_cache_id', |
| getattr(val, '_id', |
| self._query_cache[cls].get(_query, |
| None))) |
| if _id is None: |
| _id = val._model_cache_id = bson.ObjectId() |
| self._query_cache[cls][_query] = _id |
| self._instance_cache[cls][_id] = val |
| else: |
| self._query_cache[cls][_query] = None |
| self._touch(cls, _query) |
| self._check_sizes(cls) |
| |
| def _touch(self, cls, query): |
| ''' |
| Keep track of insertion order, prevent duplicates, |
| and expire from the cache in a FIFO manner. |
| ''' |
| _query = self._normalize_query(query) |
| if _query not in self._query_cache[cls]: |
| return |
| _id = self._query_cache[cls].pop(_query) |
| self._query_cache[cls][_query] = _id |
| |
| if _id not in self._instance_cache[cls]: |
| return |
| val = self._instance_cache[cls].pop(_id) |
| self._instance_cache[cls][_id] = val |
| |
| def _check_sizes(self, cls): |
| if self.num_queries(cls) > self._max_queries[cls]: |
| _id = self._remove_least_recently_used(self._query_cache[cls]) |
| if _id in self._instance_cache[cls]: |
| instance = self._instance_cache[cls][_id] |
| self._try_flush(instance, expunge=False) |
| if self.num_instances(cls) > self._max_instances[cls]: |
| instance = self._remove_least_recently_used( |
| self._instance_cache[cls]) |
| self._try_flush(instance, expunge=True) |
| |
| def _try_flush(self, instance, expunge=False): |
| try: |
| inst_session = session(instance) |
| except AttributeError: |
| inst_session = None |
| if inst_session: |
| inst_session.flush(instance) |
| if expunge: |
| inst_session.expunge(instance) |
| |
| def _remove_least_recently_used(self, cache): |
| # last-used (most-recently-used) is last in cache, so take first |
| key, val = cache.popitem(last=False) |
| return val |
| |
| def num_queries(self, cls=None): |
| if cls is None: |
| return sum(len(c) for c in self._query_cache.values()) |
| else: |
| return len(self._query_cache[cls]) |
| |
| def num_instances(self, cls=None): |
| if cls is None: |
| return sum(len(c) for c in self._instance_cache.values()) |
| else: |
| return len(self._instance_cache[cls]) |
| |
| def instance_ids(self, cls): |
| return list(self._instance_cache[cls].keys()) |
| |
| def batch_load(self, cls, query, attrs=None): |
| ''' |
| Load multiple results given a query. |
| |
| Optionally takes a list of attribute names to use |
| as the cache key. If not given, uses the keys of |
| the given query. |
| ''' |
| if attrs is None: |
| attrs = list(query.keys()) |
| for result in self._model_query(cls).find(query): |
| keys = {a: getattr(result, a) for a in attrs} |
| self.set(cls, keys, result) |
| |
| |
| class GitLikeTree: |
| |
| ''' |
| A tree node similar to that which is used in git |
| |
| :var dict blobs: files at this level of the tree. name => oid |
| :var dict trees: subtrees (child dirs). name => GitLikeTree |
| ''' |
| |
| def __init__(self): |
| self.blobs = {} # blobs[name] = oid |
| self.trees = defaultdict(GitLikeTree) # trees[name] = GitLikeTree() |
| self._hex = None |
| |
| def get_tree(self, path): |
| path = h.really_unicode(path) |
| if path.startswith('/'): |
| path = path[1:] |
| if not path: |
| return self |
| cur = self |
| for part in path.split('/'): |
| cur = cur.trees[part] |
| return cur |
| |
| def get_blob(self, path): |
| path = h.really_unicode(path) |
| if path.startswith('/'): |
| path = path[1:] |
| path_parts = path.split('/') |
| dirpath, last = path_parts[:-1], path_parts[-1] |
| cur = self |
| for part in dirpath: |
| cur = cur.trees[part] |
| return cur.blobs[last] |
| |
| def set_blob(self, path, oid): |
| path = h.really_unicode(path) |
| if path.startswith('/'): |
| path = path[1:] |
| path_parts = path.split('/') |
| dirpath, filename = path_parts[:-1], path_parts[-1] |
| cur = self |
| for part in dirpath: |
| cur = cur.trees[part] |
| cur.blobs[filename] = oid |
| |
| def hex(self): |
| '''Compute a recursive sha1 hash on the tree''' |
| # dependent on __repr__ below |
| if self._hex is None: |
| sha_obj = sha1(b'tree\n' + six.ensure_binary(repr(self))) |
| self._hex = sha_obj.hexdigest() |
| return self._hex |
| |
| def __repr__(self): |
| # this can't change, is used in hex() above |
| lines = [f't {t.hex()} {h.really_unicode(name)}' |
| for name, t in self.trees.items()] |
| lines += [f'b {oid} {h.really_unicode(name)}' |
| for name, oid in self.blobs.items()] |
| return six.ensure_str('\n'.join(sorted(lines))) |
| |
| def __str__(self): |
| return self.pretty_tree(recurse=False) |
| |
| def pretty_tree(self, indent=0, recurse=True, show_id=True): |
| '''For debugging, show a nice tree representation''' |
| lines = [' ' * indent + 't %s %s' % |
| (name, '\n' + t.unicode_full_tree(indent + 2, show_id=show_id) |
| if recurse else t.hex()) |
| for name, t in sorted(self.trees.items())] |
| lines += [' ' * indent + 'b {} {}'.format(name, oid if show_id else '') |
| for name, oid in sorted(self.blobs.items())] |
| output = h.really_unicode('\n'.join(lines)) |
| return output |
| |
| |
| def topological_sort(graph): |
| '''Return the topological sort of a graph. |
| |
| The graph is a dict with each entry representing |
| a node (the key is the node ID) and its parent(s) (a |
| set of node IDs). Result is an iterator over the topo-sorted |
| node IDs. |
| |
| The algorithm is based on one seen in |
| http://en.wikipedia.org/wiki/Topological_sorting#CITEREFKahn1962 |
| ''' |
| # Index children, identify roots |
| children = defaultdict(list) |
| roots = [] |
| for nid, parents in list(graph.items()): |
| if not parents: |
| graph.pop(nid) |
| roots.append(nid) |
| for p_nid in parents: |
| children[p_nid].append(nid) |
| # Topo sort |
| while roots: |
| n = roots.pop() |
| yield n |
| for child in children[n]: |
| graph[child].remove(n) |
| if not graph[child]: |
| graph.pop(child) |
| roots.append(child) |
| assert not graph, 'Cycle detected' |
| |
| |
| def prefix_paths_union(a, b): |
| """ |
| Given two sets of paths, a and b, find the items from a that |
| are either in b or are parent directories of items in b. |
| """ |
| union = a & b |
| prefixes = a - b |
| candidates = b - a |
| for prefix in prefixes: |
| for candidate in candidates: |
| if candidate.startswith(prefix + '/'): |
| union.add(prefix) |
| break |
| return union |
| |
| |
| def zipdir(source, zipfile, exclude=None): |
| """Create zip archive using zip binary.""" |
| zipbin = tg.config.get('scm.repos.tarball.zip_binary', '/usr/bin/zip') |
| source = source.rstrip('/') |
| # this is needed to get proper prefixes inside zip-file |
| working_dir = os.path.dirname(source) |
| source_fn = os.path.basename(source) |
| command = [zipbin, '-y', '-q', '-r', zipfile, source_fn.encode('utf-8')] |
| if exclude: |
| command += ['-x', exclude] |
| p = Popen(command, cwd=working_dir, stdout=PIPE, stderr=PIPE) |
| stdout, stderr = p.communicate() |
| if p.returncode != 0: |
| raise Exception( |
| "Command: {} returned non-zero exit code {}\n" |
| "STDOUT: {}\n" |
| "STDERR: {}".format(command, p.returncode, stdout, stderr)) |
| |
| |
| Mapper.compile_all() |