Add GitHub Issues scanner
diff --git a/README.md b/README.md
index e6d8bef..371276c 100644
--- a/README.md
+++ b/README.md
@@ -22,8 +22,9 @@
## Currently available scanner plugins:
- Apache Pony Mail
- - GNU Mailman Pipermail
- Atlassian JIRA
+ - GitHub issues/PRs
+ - GNU Mailman Pipermail
-
+
diff --git a/src/plugins/scanners/__init__.py b/src/plugins/scanners/__init__.py
index aea84e1..e3e5c58 100644
--- a/src/plugins/scanners/__init__.py
+++ b/src/plugins/scanners/__init__.py
@@ -26,7 +26,8 @@
__all__ = [
'jira',
'ponymail',
- 'pipermail'
+ 'pipermail',
+ 'github-issues'
]
# Import each plugin into a hash called 'scanners'
diff --git a/src/plugins/scanners/github-issues.py b/src/plugins/scanners/github-issues.py
new file mode 100644
index 0000000..b50efde
--- /dev/null
+++ b/src/plugins/scanners/github-issues.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3.4
+# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+ #the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+import hashlib
+from dateutil import parser
+import time
+import requests
+import plugins.utils.github
+
+title = "Scanner for GitHub Issues"
+version = "0.1.0"
+
+def accepts(source):
+ """ Return true if this is a github repo """
+ if source['type'] == 'github':
+ return True
+ if source['type'] == 'git' and re.match(r"https://(?:www\.)?github.com/"):
+ return True
+ return False
+
+def format_date(d, epoch=False):
+ if not d:
+ return
+ parsed = parser.parse(d)
+
+ if epoch:
+ return time.mktime(parsed.timetuple())
+
+ return time.strftime("%Y/%m/%d %H:%M:%S", parsed.timetuple())
+
+def make_hash(source, issue):
+ return hashlib.sha224(("%s-%s-%s" % (source['organisation'],
+ source['sourceID'],
+ str(issue['id']))).encode('ascii',
+ errors='replace')).hexdigest()
+
+def make_issue(source, issue, people):
+
+ key = str(issue['number'])
+ dhash = make_hash(source, issue)
+
+ closed_date = issue.get('closed_at', None)
+
+ owner_email = people[issue['user']['login']]['email']
+
+ issue_closer = owner_email
+ if 'closed_by' in issue:
+ issue_closer = people[issue['closed_by']['login']]
+
+ return {
+ 'id': dhash,
+ 'key': key,
+ 'organisation': source['organisation'],
+ 'sourceID': source['sourceID'],
+ 'url': issue['html_url'],
+ 'status': issue['state'],
+ 'created': format_date(issue['created_at'], epoch=True),
+ 'closed': format_date(closed_date, epoch=True),
+ 'issueCloser': issue_closer,
+ 'createdDate': format_date(issue['created_at']),
+ 'closedDate': format_date(closed_date),
+ 'changeDate': format_date(closed_date
+ if closed_date
+ else issue['updated_at']),
+ 'assignee': owner_email,
+ 'issueCreator': owner_email,
+ 'comments': issue['comments'],
+ 'title': issue['title']
+ }
+
+def make_person(source, issue, raw_person):
+ email = raw_person['email']
+ if not email:
+ email = "%s@invalid.github.com" % issue['user']['login']
+
+ name = raw_person['name']
+ if not name:
+ name = raw_person['login']
+
+ id = hashlib.sha1(("%s%s" % (source['organisation'],
+ email)).encode('ascii',
+ errors='replace')).hexdigest()
+
+ return {'email': email, 'id': id, 'organisation': source['organisation'],
+ 'name': name}
+
+def status_changed(stored_issue, issue):
+ return stored_issue['status'] != issue['status']
+
+def update_issue(KibbleBit, issue):
+ KibbleBit.append('issue', issue['id'], issue)
+
+def update_person(KibbleBit, person):
+ KibbleBit.append('person', { 'doc': person, 'doc_as_upsert': True})
+
+
+def scan(KibbleBit, source):
+ auth=None
+ people = {}
+ if 'creds' in source:
+ KibbleBit.pprint("Using auth for repo %s" % source['sourceURL'])
+ creds = source['creds']
+ if creds and 'username' in creds:
+ auth = (creds['username'], creds['password'])
+
+ try:
+ issues = plugins.utils.github.get_all(source, plugins.utils.github.issues,
+ params={'filter': 'all', 'state':'all'},
+ auth=auth)
+ KibbleBit.pprint("Fetched %s issues for %s" %(str(len(issues)), source['sourceURL']))
+
+ for issue in issues:
+
+ if not issue['user']['login'] in people:
+ person = make_person(source, issue, plugins.utils.github.user(issue['user']['url'],
+ auth=auth))
+ people[issue['user']['login']] = person
+ update_person(KibbleBit, person)
+
+ if 'closed_by' in issue and not issue['closed_by']['login'] in people:
+ closer = make_person(source, issue, plugins.utils.github.user(issue['closed_by']['url'],
+ auth=auth))
+ people[issue['closed_by']['login']] = closer
+ update_person(KibbleBit, closer)
+
+ doc = make_issue(source, issue, people)
+ dhash = doc['id']
+
+ stored_change = None
+ if KibbleBit.exists('issue', dhash):
+ es_doc = KibbleBit.get('issue', dhash)
+ if not status_changed(es_doc, doc):
+ KibbleBit.pprint("change %s seen already and status unchanged. Skipping." % issue['id'])
+ continue
+
+ update_issue(KibbleBit, doc)
+
+ except requests.HTTPError as e:
+ # we've likely hit our GH API quota for the hour, so we re-try
+ KibbleBit.pprint("HTTP Error, rate limit exceeded?")
+ time.sleep(3600)
diff --git a/src/plugins/utils/github.py b/src/plugins/utils/github.py
new file mode 100644
index 0000000..b037bc9
--- /dev/null
+++ b/src/plugins/utils/github.py
@@ -0,0 +1,56 @@
+#!env /usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+ #the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""" GitHub utility library """
+import re
+import requests
+from json import loads
+
+repo_pattern = re.compile('.*[:/]([^/]+)/([^/]+).git')
+issues_api = "https://api.github.com/repos/%s/%s/issues"
+
+def issues(source, params={}, auth=None):
+ local_params = {'per_page': 100, 'page': 1}
+ local_params.update(params)
+
+ repo_user = repo_pattern.findall(source['sourceURL'])[0]
+ resp = requests.get(issues_api % repo_user, params=local_params, auth=auth)
+ resp.raise_for_status()
+
+ return resp.json()
+
+def user(user_url, auth=None):
+ resp = requests.get(user_url, auth=auth)
+ resp.raise_for_status()
+
+ return resp.json()
+
+def get_all(source, f, params={}, auth=None):
+ acc = []
+ page = params.get('page', 1)
+
+ while True:
+ items = f(source, params=params, auth=auth)
+ if not items:
+ break
+
+ acc.extend(items)
+
+ page = page + 1
+ params.update({"page": page})
+
+ return acc