| import re |
| import sys |
| import logging |
| from hashlib import sha1 |
| from itertools import izip, chain |
| from datetime import datetime |
| from collections import defaultdict |
| |
| from pylons import g, c |
| import pymongo.errors |
| |
| from ming import Field, Index, collection |
| from ming import schema as S |
| from ming.base import Object |
| from ming.utils import LazyProperty |
| from ming.orm import mapper, session |
| |
| from allura.lib import utils |
| from allura.lib import helpers as h |
| |
| from .auth import User |
| from .session import main_doc_session, project_doc_session |
| from .session import repository_orm_session |
| |
| log = logging.getLogger(__name__) |
| |
| # Some schema types |
| SUser = dict(name=str, email=str, date=datetime) |
| SObjType=S.OneOf('blob', 'tree', 'submodule') |
| |
| # Used for when we're going to batch queries using $in |
| QSIZE = 100 |
| README_RE = re.compile('^README(\.[^.]*)?$', re.IGNORECASE) |
| |
| # Basic commit information |
| CommitDoc = collection( |
| 'repo_ci', main_doc_session, |
| Field('_id', str), |
| Field('tree_id', str), |
| Field('committed', SUser), |
| Field('authored', SUser), |
| Field('message', str), |
| Field('parent_ids', [str], index=True), |
| Field('child_ids', [str], index=True), |
| Field('repo_ids', [ S.ObjectId() ], index=True)) |
| |
| # Basic tree information |
| TreeDoc = collection( |
| 'repo_tree', main_doc_session, |
| Field('_id', str), |
| Field('tree_ids', [dict(name=str, id=str)]), |
| Field('blob_ids', [dict(name=str, id=str)]), |
| Field('other_ids', [dict(name=str, id=str, type=SObjType)])) |
| |
| # Information about the last commit to touch a tree/blob |
| LastCommitDoc = collection( |
| 'repo_last_commit', project_doc_session, |
| Field('_id', str), |
| Field('repo_id', S.ObjectId()), |
| Field('object_id', str), |
| Field('commit_info', dict( |
| id=str, |
| date=datetime, |
| author=str, |
| author_email=str, |
| author_url=str, |
| href=str, |
| shortlink=str, |
| summary=str)), |
| Index('repo_id', 'object_id')) |
| |
| # List of all trees contained within a commit |
| TreesDoc = collection( |
| 'repo_trees', main_doc_session, |
| Field('_id', str), |
| Field('tree_ids', [str])) |
| |
| # Information about which things were added/removed in commit |
| DiffInfoDoc = collection( |
| 'repo_diffinfo', main_doc_session, |
| Field('_id', str), |
| Field( |
| 'differences', |
| [ dict(name=str, lhs_id=str, rhs_id=str)])) |
| |
| # List of commit runs (a run is a linear series of single-parent commits) |
| CommitRunDoc = collection( |
| 'repo_commitrun', main_doc_session, |
| Field('_id', str), |
| Field('parent_commit_ids', [str]), |
| Field('commit_ids', [str], index=True), |
| Field('commit_times', [datetime])) |
| |
| class RepoObject(object): |
| |
| def __repr__(self): # pragma no cover |
| return '<%s %s>' % ( |
| self.__class__.__name__, self._id) |
| |
| def primary(self): |
| return self |
| |
| def index_id(self): |
| '''Globally unique artifact identifier. Used for |
| SOLR ID, shortlinks, and maybe elsewhere |
| ''' |
| id = '%s.%s#%s' % ( |
| self.__class__.__module__, |
| self.__class__.__name__, |
| self._id) |
| return id.replace('.', '/') |
| |
| @LazyProperty |
| def legacy(self): |
| return Object(object_id=self._id) |
| |
| @property |
| def object_id(self): |
| return self._id |
| |
| @classmethod |
| def upsert(cls, id): |
| isnew = False |
| r = cls.query.get(_id=id) |
| if r is not None: return r, isnew |
| try: |
| r = cls(_id=id) |
| session(r).flush(r) |
| isnew = True |
| except pymongo.errors.DuplicateKeyError: # pragma no cover |
| session(r).expunge(r) |
| r = cls.query.get(_id=id) |
| return r, isnew |
| |
| class Commit(RepoObject): |
| type_s = 'Commit' |
| # Ephemeral attrs |
| repo=None |
| |
| def set_context(self, repo): |
| self.repo = repo |
| |
| @LazyProperty |
| def author_url(self): |
| u = User.by_email_address(self.authored.email) |
| if u: return u.url() |
| |
| @LazyProperty |
| def committer_url(self): |
| u = User.by_email_address(self.committed.email) |
| if u: return u.url() |
| |
| @LazyProperty |
| def tree(self): |
| if self.tree_id is None: |
| self.tree_id = self.repo.compute_tree_new(self) |
| if self.tree_id is None: |
| return None |
| t = Tree.query.get(_id=self.tree_id) |
| if t is None: |
| self.tree_id = self.repo.compute_tree_new(self) |
| t = Tree.query.get(_id=self.tree_id) |
| if t is not None: t.set_context(self) |
| return t |
| |
| @LazyProperty |
| def summary(self): |
| message = h.really_unicode(self.message) |
| first_line = message.split('\n')[0] |
| return h.text.truncate(first_line, 50) |
| |
| def shorthand_id(self): |
| if self.repo is None: self.repo = self.guess_repo() |
| if self.repo is None: return repr(self) |
| return self.repo.shorthand_for_commit(self._id) |
| |
| @LazyProperty |
| def symbolic_ids(self): |
| return self.repo.symbolics_for_commit(self.legacy) |
| |
| def url(self): |
| if self.repo is None: self.repo = self.guess_repo() |
| if self.repo is None: return '#' |
| return self.repo.url_for_commit(self.legacy) |
| |
| def guess_repo(self): |
| for ac in c.project.app_configs: |
| try: |
| app = c.project.app_instance(ac) |
| if app.repo._id in self.repo_ids: |
| return app.repo |
| except AttributeError: |
| pass |
| return None |
| |
| def link_text(self): |
| '''The link text that will be used when a shortlink to this artifact |
| is expanded into an <a></a> tag. |
| |
| By default this method returns shorthand_id(). Subclasses should |
| override this method to provide more descriptive link text. |
| ''' |
| return self.shorthand_id() |
| |
| def log_iter(self, skip, count): |
| for oids in utils.chunked_iter(commitlog([self._id]), QSIZE): |
| oids = list(oids) |
| commits = dict( |
| (ci._id, ci) for ci in self.query.find(dict( |
| _id={'$in': oids}))) |
| for oid in oids: |
| if skip: |
| skip -= 1 |
| continue |
| if count: |
| count -= 1 |
| ci = commits[oid] |
| ci.set_context(self.repo) |
| yield ci |
| else: |
| break |
| |
| def log(self, skip, count): |
| return list(self.log_iter(skip, count)) |
| |
| def count_revisions(self): |
| result = 0 |
| for oid in commitlog([self._id]): result += 1 |
| return result |
| |
| def context(self): |
| result = dict(prev=None, next=None) |
| if self.parent_ids: |
| result['prev'] = self.query.get(_id=self.parent_ids[0]) |
| if self.child_ids: |
| result['next'] = self.query.get(_id=self.child_ids[0]) |
| return result |
| |
| class Tree(RepoObject): |
| # Ephemeral attrs |
| repo=None |
| commit=None |
| parent=None |
| name=None |
| |
| def compute_hash(self): |
| '''Compute a hash based on the contents of the tree. Note that this |
| hash does not necessarily correspond to any actual DVCS hash. |
| ''' |
| lines = ( |
| [ 'tree' + x.name + x.id for x in self.tree_ids ] |
| + [ 'blob' + x.name + x.id for x in self.blob_ids ] |
| + [ x.type + x.name + x.id for x in self.other_ids ]) |
| sha_obj = sha1() |
| for line in sorted(lines): |
| sha_obj.update(line) |
| return sha_obj.hexdigest() |
| |
| def __getitem__(self, name): |
| obj = self.by_name[name] |
| if obj['type'] == 'blob': return obj |
| obj = self.query.get(_id=obj['id']) |
| if obj is None: |
| oid = self.repo.compute_tree_new(self.commit, self.path() + name + '/') |
| obj = self.query.get(_id=oid) |
| if obj is None: raise KeyError, name |
| obj.set_context(self, name) |
| return obj |
| |
| def set_context(self, commit_or_tree, name=None): |
| assert commit_or_tree is not self |
| self.repo = commit_or_tree.repo |
| if name: |
| self.commit = commit_or_tree.commit |
| self.parent = commit_or_tree |
| self.name = name |
| else: |
| self.commit = commit_or_tree |
| |
| def readme(self): |
| 'returns (filename, unicode text) if a readme file is found' |
| for x in self.blob_ids: |
| if README_RE.match(x.name): |
| name = x.name |
| obj = Object( |
| object_id=x.id, |
| path=lambda:self.path() + x['name'], |
| commit=Object( |
| object_id=self.commit._id)) |
| text = self.repo.open_blob(obj).read() |
| return (x.name, h.really_unicode(text)) |
| |
| def ls(self): |
| # Load last commit info |
| oids = [ x.id for x in chain(self.tree_ids, self.blob_ids, self.other_ids) ] |
| lc_index = dict( |
| (lc.object_id, lc.commit_info) |
| for lc in LastCommitDoc.m.find(dict( |
| repo_id=self.repo._id, |
| object_id={'$in': oids}))) |
| results = [] |
| def _get_last_commit(oid): |
| lc = lc_index.get(oid) |
| if lc is None: |
| lc = dict( |
| author=None, |
| author_email=None, |
| author_url=None, |
| date=None, |
| id=None, |
| href=None, |
| shortlink=None, |
| summary=None) |
| return lc |
| for x in sorted(self.tree_ids, key=lambda x:x.name): |
| results.append(dict( |
| kind='DIR', |
| name=x.name, |
| href=x.name + '/', |
| last_commit=_get_last_commit(x.id))) |
| for x in sorted(self.blob_ids, key=lambda x:x.name): |
| results.append(dict( |
| kind='FILE', |
| name=x.name, |
| href=x.name + '/', |
| last_commit=_get_last_commit(x.id))) |
| for x in sorted(self.other_ids, key=lambda x:x.name): |
| results.append(dict( |
| kind=x.type, |
| name=x.name, |
| href=None, |
| last_commit=_get_last_commit(x.id))) |
| return results |
| |
| def path(self): |
| if self.parent: |
| assert self.parent is not self |
| return self.parent.path() + self.name + '/' |
| else: |
| return '/' |
| |
| def url(self): |
| return self.commit.url() + 'tree' + self.path() |
| |
| @LazyProperty |
| def by_name(self): |
| d = dict((x.name, x) for x in self.other_ids) |
| d.update( |
| (x.name, dict(x, type='tree')) |
| for x in self.tree_ids) |
| d.update( |
| (x.name, dict(x, type='blob')) |
| for x in self.blob_ids) |
| return d |
| |
| def is_blob(self, name): |
| return self.by_name[name]['type'] == 'blob' |
| |
| mapper(Commit, CommitDoc, repository_orm_session) |
| mapper(Tree, TreeDoc, repository_orm_session) |
| |
| def commitlog(commit_ids, skip=0, limit=sys.maxint): |
| |
| seen = set() |
| def _visit(commit_id): |
| if commit_id in seen: return |
| run = CommitRunDoc.m.get(commit_ids=commit_id) |
| if run is None: return |
| index = False |
| for pos, (oid, time) in enumerate(izip(run.commit_ids, run.commit_times)): |
| if oid == commit_id: index = True |
| elif not index: continue |
| seen.add(oid) |
| ci_times[oid] = time |
| if pos+1 < len(run.commit_ids): |
| ci_parents[oid] = [ run.commit_ids[pos+1] ] |
| else: |
| ci_parents[oid] = run.parent_commit_ids |
| for oid in run.parent_commit_ids: |
| _visit(oid) |
| |
| def _gen_ids(commit_ids, skip, limit): |
| # Traverse the graph in topo order, yielding commit IDs |
| commits = set(commit_ids) |
| new_parent = None |
| while commits and limit: |
| # next commit is latest commit that's valid to log |
| if new_parent in commits: |
| ci = new_parent |
| else: |
| ci = max(commits, key=lambda ci:ci_times[ci]) |
| commits.remove(ci) |
| if skip: |
| skip -= 1 |
| continue |
| else: |
| limit -= 1 |
| yield ci |
| # remove this commit from its parents children and add any childless |
| # parents to the 'ready set' |
| new_parent = None |
| for oid in ci_parents[ci]: |
| children = ci_children[oid] |
| children.discard(ci) |
| if not children: |
| commits.add(oid) |
| new_parent = oid |
| |
| # Load all the runs to build a commit graph |
| ci_times = {} |
| ci_parents = {} |
| ci_children = defaultdict(set) |
| log.info('Build commit graph') |
| for cid in commit_ids: |
| _visit(cid) |
| for oid, parents in ci_parents.iteritems(): |
| for ci_parent in parents: |
| ci_children[ci_parent].add(oid) |
| |
| return _gen_ids(commit_ids, skip, limit) |