Add GitHub Issues scanner
diff --git a/README.md b/README.md index e6d8bef..371276c 100644 --- a/README.md +++ b/README.md
@@ -22,8 +22,9 @@ ## Currently available scanner plugins: - Apache Pony Mail - - GNU Mailman Pipermail - Atlassian JIRA + - GitHub issues/PRs + - GNU Mailman Pipermail - +
diff --git a/src/plugins/scanners/__init__.py b/src/plugins/scanners/__init__.py index aea84e1..e3e5c58 100644 --- a/src/plugins/scanners/__init__.py +++ b/src/plugins/scanners/__init__.py
@@ -26,7 +26,8 @@ __all__ = [ 'jira', 'ponymail', - 'pipermail' + 'pipermail', + 'github-issues' ] # Import each plugin into a hash called 'scanners'
diff --git a/src/plugins/scanners/github-issues.py b/src/plugins/scanners/github-issues.py new file mode 100644 index 0000000..b50efde --- /dev/null +++ b/src/plugins/scanners/github-issues.py
@@ -0,0 +1,156 @@ +#!/usr/bin/env python3.4 +# -*- coding: utf-8 -*- +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with + #the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import hashlib +from dateutil import parser +import time +import requests +import plugins.utils.github + +title = "Scanner for GitHub Issues" +version = "0.1.0" + +def accepts(source): + """ Return true if this is a github repo """ + if source['type'] == 'github': + return True + if source['type'] == 'git' and re.match(r"https://(?:www\.)?github.com/"): + return True + return False + +def format_date(d, epoch=False): + if not d: + return + parsed = parser.parse(d) + + if epoch: + return time.mktime(parsed.timetuple()) + + return time.strftime("%Y/%m/%d %H:%M:%S", parsed.timetuple()) + +def make_hash(source, issue): + return hashlib.sha224(("%s-%s-%s" % (source['organisation'], + source['sourceID'], + str(issue['id']))).encode('ascii', + errors='replace')).hexdigest() + +def make_issue(source, issue, people): + + key = str(issue['number']) + dhash = make_hash(source, issue) + + closed_date = issue.get('closed_at', None) + + owner_email = people[issue['user']['login']]['email'] + + issue_closer = owner_email + if 'closed_by' in issue: + issue_closer = people[issue['closed_by']['login']] + + return { + 'id': dhash, + 'key': key, + 'organisation': source['organisation'], + 'sourceID': source['sourceID'], + 'url': issue['html_url'], + 'status': issue['state'], + 'created': format_date(issue['created_at'], epoch=True), + 'closed': format_date(closed_date, epoch=True), + 'issueCloser': issue_closer, + 'createdDate': format_date(issue['created_at']), + 'closedDate': format_date(closed_date), + 'changeDate': format_date(closed_date + if closed_date + else issue['updated_at']), + 'assignee': owner_email, + 'issueCreator': owner_email, + 'comments': issue['comments'], + 'title': issue['title'] + } + +def make_person(source, issue, raw_person): + email = raw_person['email'] + if not email: + email = "%s@invalid.github.com" % issue['user']['login'] + + name = raw_person['name'] + if not name: + name = raw_person['login'] + + id = hashlib.sha1(("%s%s" % (source['organisation'], + email)).encode('ascii', + errors='replace')).hexdigest() + + return {'email': email, 'id': id, 'organisation': source['organisation'], + 'name': name} + +def status_changed(stored_issue, issue): + return stored_issue['status'] != issue['status'] + +def update_issue(KibbleBit, issue): + KibbleBit.append('issue', issue['id'], issue) + +def update_person(KibbleBit, person): + KibbleBit.append('person', { 'doc': person, 'doc_as_upsert': True}) + + +def scan(KibbleBit, source): + auth=None + people = {} + if 'creds' in source: + KibbleBit.pprint("Using auth for repo %s" % source['sourceURL']) + creds = source['creds'] + if creds and 'username' in creds: + auth = (creds['username'], creds['password']) + + try: + issues = plugins.utils.github.get_all(source, plugins.utils.github.issues, + params={'filter': 'all', 'state':'all'}, + auth=auth) + KibbleBit.pprint("Fetched %s issues for %s" %(str(len(issues)), source['sourceURL'])) + + for issue in issues: + + if not issue['user']['login'] in people: + person = make_person(source, issue, plugins.utils.github.user(issue['user']['url'], + auth=auth)) + people[issue['user']['login']] = person + update_person(KibbleBit, person) + + if 'closed_by' in issue and not issue['closed_by']['login'] in people: + closer = make_person(source, issue, plugins.utils.github.user(issue['closed_by']['url'], + auth=auth)) + people[issue['closed_by']['login']] = closer + update_person(KibbleBit, closer) + + doc = make_issue(source, issue, people) + dhash = doc['id'] + + stored_change = None + if KibbleBit.exists('issue', dhash): + es_doc = KibbleBit.get('issue', dhash) + if not status_changed(es_doc, doc): + KibbleBit.pprint("change %s seen already and status unchanged. Skipping." % issue['id']) + continue + + update_issue(KibbleBit, doc) + + except requests.HTTPError as e: + # we've likely hit our GH API quota for the hour, so we re-try + KibbleBit.pprint("HTTP Error, rate limit exceeded?") + time.sleep(3600)
diff --git a/src/plugins/utils/github.py b/src/plugins/utils/github.py new file mode 100644 index 0000000..b037bc9 --- /dev/null +++ b/src/plugins/utils/github.py
@@ -0,0 +1,56 @@ +#!env /usr/bin/env python3 +# -*- coding: utf-8 -*- +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with + #the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" GitHub utility library """ +import re +import requests +from json import loads + +repo_pattern = re.compile('.*[:/]([^/]+)/([^/]+).git') +issues_api = "https://api.github.com/repos/%s/%s/issues" + +def issues(source, params={}, auth=None): + local_params = {'per_page': 100, 'page': 1} + local_params.update(params) + + repo_user = repo_pattern.findall(source['sourceURL'])[0] + resp = requests.get(issues_api % repo_user, params=local_params, auth=auth) + resp.raise_for_status() + + return resp.json() + +def user(user_url, auth=None): + resp = requests.get(user_url, auth=auth) + resp.raise_for_status() + + return resp.json() + +def get_all(source, f, params={}, auth=None): + acc = [] + page = params.get('page', 1) + + while True: + items = f(source, params=params, auth=auth) + if not items: + break + + acc.extend(items) + + page = page + 1 + params.update({"page": page}) + + return acc