blob: a5e3ddbc3c3e38350ed4f16b3a408a8784abe8e5 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import unicode_literals
from __future__ import absolute_import
import re
import logging
import json
import time
import six.moves.urllib.request, six.moves.urllib.error, six.moves.urllib.parse
from datetime import datetime
from tg import config, session, redirect, request, expose
from tg.decorators import without_trailing_slash
from tg import tmpl_context as c
from requests_oauthlib import OAuth2Session
import requests
from formencode import validators as fev
from forgeimporters import base
log = logging.getLogger(__name__)
class GitHubProjectNameValidator(fev.FancyValidator):
not_empty = True
messages = {
'invalid': 'Valid symbols are: letters, numbers, dashes, '
'underscores and periods',
'unavailable': 'This project is unavailable for import',
}
def _to_python(self, value, state=None):
user_name = state.full_dict.get('user_name', '')
user_name = state.full_dict.get('gh_user_name', user_name).strip()
project_name = value.strip()
full_project_name = '%s/%s' % (user_name, project_name)
if not re.match(r'^[a-zA-Z0-9-_.]+$', project_name):
raise fev.Invalid(self.message('invalid', state), value, state)
if not GitHubProjectExtractor(full_project_name, user=c.user).check_readable():
raise fev.Invalid(self.message('unavailable', state), value, state)
return project_name
class GitHubProjectExtractor(base.ProjectExtractor):
PAGE_MAP = {
'project_info': 'https://api.github.com/repos/{project_name}',
'issues': 'https://api.github.com/repos/{project_name}/issues',
'wiki_url': 'https://github.com/{project_name}.wiki',
}
POSSIBLE_STATES = ('open', 'closed')
SUPPORTED_ISSUE_EVENTS = ('closed', 'reopened', 'assigned')
NEXT_PAGE_URL_RE = re.compile(r'<([^>]*)>; rel="next"')
def __init__(self, *args, **kw):
self.token = None
user = kw.pop('user', None)
if user:
self.token = user.get_tool_data('GitHubProjectImport', 'token')
super(GitHubProjectExtractor, self).__init__(*args, **kw)
def add_token(self, url):
headers = {}
if self.token:
headers['Authorization'] = 'token {}'.format(self.token)
return url, headers
def wait_for_limit_reset(self, headers):
reset = headers.get('X-RateLimit-Reset')
limit = headers.get('X-RateLimit-Limit')
reset = datetime.utcfromtimestamp(int(reset))
now = datetime.utcnow()
log.warn('Rate limit exceeded (%s requests/hour). '
'Sleeping until %s UTC' % (limit, reset))
time.sleep((reset - now).total_seconds())
def urlopen(self, url, headers=None, **kw):
if headers is None:
headers = {}
try:
url, auth_headers = self.add_token(url)
# need to use unredirected_hdrs for Authorization for APIs that redirect to an AWS file asset which has
# separate authentication added automatically
resp = super(GitHubProjectExtractor, self).urlopen(url,
headers=headers, unredirected_hdrs=auth_headers, **kw)
except six.moves.urllib.error.HTTPError as e:
# GitHub will return 403 if rate limit exceeded.
# We're checking for limit on every request below, but we still
# can get 403 if other import task exceeds the limit before.
if e.code == 403 and e.info().get('X-RateLimit-Remaining') == '0':
self.wait_for_limit_reset(e.info())
return self.urlopen(url, **kw)
else:
raise e
remain = resp.info().get('X-RateLimit-Remaining')
if remain and int(remain) == 0:
self.wait_for_limit_reset(resp.info())
return self.urlopen(url, **kw)
return resp
def check_readable(self):
url, headers = self.add_token(self.get_page_url('project_info'))
headers['User-Agent'] = 'Allura Data Importer (https://allura.apache.org/)'
resp = requests.head(url, headers=headers, timeout=10)
return resp.status_code == 200
def get_next_page_url(self, link):
if not link:
return
m = self.NEXT_PAGE_URL_RE.match(link)
return m.group(1) if m else None
def parse_page(self, page):
# Look at link header to handle pagination
link = page.info().get('Link')
next_page_url = self.get_next_page_url(link)
return json.loads(page.read().decode('utf8')), next_page_url
def get_page(self, page_name_or_url, **kw):
page = super(GitHubProjectExtractor, self).get_page(
page_name_or_url, **kw)
page, next_page_url = page
while next_page_url:
p = super(GitHubProjectExtractor,
self).get_page(next_page_url, **kw)
p, next_page_url = p
page += p
self.page = page
return self.page
def get_summary(self):
return self.get_page('project_info').get('description')
def get_homepage(self):
return self.get_page('project_info').get('homepage')
def get_repo_url(self):
return self.get_page('project_info').get('clone_url')
def iter_issues(self):
# github api doesn't allow getting closed and opened tickets in one
# query
issues = []
url = self.get_page_url('issues') + '?state={state}'
for state in self.POSSIBLE_STATES:
issue_list_url = url.format(
state=state,
)
issues += self.get_page(issue_list_url)
issues.sort(key=lambda x: x['number'])
for issue in issues:
yield (issue['number'], issue)
def iter_comments(self, issue):
comments_url = issue['comments_url']
comments = self.get_page(comments_url)
for comment in comments:
yield comment
def iter_events(self, issue):
events_url = issue['events_url']
events = self.get_page(events_url)
for event in events:
if event.get('event') in self.SUPPORTED_ISSUE_EVENTS:
yield event
def has_wiki(self):
return self.get_page('project_info').get('has_wiki')
def has_tracker(self):
return self.get_page('project_info').get('has_issues')
def oauth_app_basic_auth(config):
client_id = config['github_importer.client_id']
secret = config['github_importer.client_secret']
return requests.auth.HTTPBasicAuth(client_id, secret)
def valid_access_token(access_token):
return access_token_details(access_token).status_code == 200
def access_token_details(access_token):
# https://developer.github.com/v3/apps/oauth_applications/#check-a-token
client_id = config['github_importer.client_id']
url = 'https://api.github.com/applications/{}/token'.format(client_id)
return requests.post(url, auth=oauth_app_basic_auth(config), timeout=10, json=dict(
access_token=access_token,
))
class GitHubOAuthMixin(object):
'''
Support for github oauth web application flow. This is an "OAuth App" not a "GitHub App"
'''
def oauth_begin(self, scope=None):
client_id = config.get('github_importer.client_id')
secret = config.get('github_importer.client_secret')
if not client_id or not secret:
log.warn('github_importer.* not set up in .ini file; cannot use OAuth for GitHub')
return # GitHub app is not configured
access_token = c.user.get_tool_data('GitHubProjectImport', 'token')
if access_token and valid_access_token(access_token):
return
redirect_uri = request.url.rstrip('/') + '/oauth_callback'
oauth = OAuth2Session(client_id, redirect_uri=redirect_uri, scope=scope)
auth_url, state = oauth.authorization_url(
'https://github.com/login/oauth/authorize')
# Used in callback to prevent CSRF
session['github.oauth.state'] = state
session['github.oauth.redirect'] = request.url
session.save()
redirect(auth_url)
@without_trailing_slash
@expose()
def oauth_callback(self, **kw):
client_id = config.get('github_importer.client_id')
secret = config.get('github_importer.client_secret')
if not client_id or not secret:
return # GitHub app is not configured
oauth = OAuth2Session(
client_id, state=session.get('github.oauth.state'))
token = oauth.fetch_token(
'https://github.com/login/oauth/access_token',
client_secret=secret,
authorization_response=request.url
)
c.user.set_tool_data('GitHubProjectImport',
token=token['access_token'])
self.oauth_callback_complete()
redirect(session.get('github.oauth.redirect', '/'))
def oauth_callback_complete(self):
"""Subclasses can implement this to perform additional actions when
token is retrieved"""
pass
def oauth_has_access(self, scope):
if not scope:
return False
token = c.user.get_tool_data('GitHubProjectImport', 'token')
if not token:
return False
r = access_token_details(token)
scopes = r.json()['scopes']
return scope in scopes