ForgeImporters/forgeimporters/google/__init__.py - allura - Git at Google

 #       Licensed to the Apache Software Foundation (ASF) under one
 #       or more contributor license agreements.  See the NOTICE file
 #       distributed with this work for additional information
 #       regarding copyright ownership.  The ASF licenses this file
 #       to you under the Apache License, Version 2.0 (the
 #       "License"); you may not use this file except in compliance
 #       with the License.  You may obtain a copy of the License at
 #
 #         http://www.apache.org/licenses/LICENSE-2.0
 #
 #       Unless required by applicable law or agreed to in writing,
 #       software distributed under the License is distributed on an
 #       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 #       KIND, either express or implied.  See the License for the
 #       specific language governing permissions and limitations
 #       under the License.

 import re
 import urllib
 from urlparse import urlparse, urljoin
 from collections import defaultdict
 try:
     from cStringIO import StringIO
 except ImportError:
     from StringIO import StringIO
 import logging

 from BeautifulSoup import BeautifulSoup

 from allura import model as M
 from forgeimporters.base import ProjectExtractor


 log = logging.getLogger(__name__)

 class GoogleCodeProjectExtractor(ProjectExtractor):
     BASE_URL = 'http://code.google.com'
     RE_REPO_TYPE = re.compile(r'(svn|hg|git)')

     PAGE_MAP = {
             'project_info': BASE_URL + '/p/%s/',
             'source_browse': BASE_URL + '/p/%s/source/browse/',
         }

     LICENSE_MAP = defaultdict(lambda:'Other/Proprietary License', {
             'Apache License 2.0': 'Apache Software License',
             'Artistic License/GPL': 'Artistic License',
             'Eclipse Public License 1.0': 'Eclipse Public License',
             'GNU GPL v2': 'GNU General Public License (GPL)',
             'GNU GPL v3': 'GNU General Public License (GPL)',
             'GNU Lesser GPL': 'GNU Library or Lesser General Public License (LGPL)',
             'MIT License': 'MIT License',
             'Mozilla Public License 1.1': 'Mozilla Public License 1.1 (MPL 1.1)',
             'New BSD License': 'BSD License',
             'Other Open Source': 'Other/Proprietary License',
         })

     DEFAULT_ICON = 'http://www.gstatic.com/codesite/ph/images/defaultlogo.png'

     def __init__(self, allura_project, gc_project_name, page=None):
         self.project = allura_project
         self.gc_project_name = gc_project_name
         self._page_cache = {}
         self.url = None
         self.page = None
         if page:
             self.get_page(page)

     def get_page(self, page_name_or_url):
         """Return a Beautiful soup object for the given page name or url.

         If a page name is provided, the associated url is looked up in
         :attr:`PAGE_MAP`.

         Results are cached so that subsequent calls for the same page name or
         url will return the cached result rather than making another HTTP
         request.

         """
         if page_name_or_url in self._page_cache:
             return self._page_cache[page_name_or_url]
         self.url = (self.get_page_url(page_name_or_url) if page_name_or_url in
                 self.PAGE_MAP else page_name_or_url)
         self.page = self._page_cache[page_name_or_url] = \
                 BeautifulSoup(self.urlopen(self.url))
         return self.page

     def get_page_url(self, page_name):
         """Return the url associated with ``page_name``.

         Raises KeyError if ``page_name`` is not in :attr:`PAGE_MAP`.

         """
         return self.PAGE_MAP[page_name] % urllib.quote(self.gc_project_name)

     def get_short_description(self):
         page = self.get_page('project_info')
         self.project.short_description = page.find(itemprop='description').string.strip()

     def get_icon(self):
         page = self.get_page('project_info')
         icon_url = urljoin(self.url, page.find(itemprop='image').attrMap['src'])
         if icon_url == self.DEFAULT_ICON:
             return
         icon_name = urllib.unquote(urlparse(icon_url).path).split('/')[-1]
         fp_ish = self.urlopen(icon_url)
         fp = StringIO(fp_ish.read())
         M.ProjectFile.save_image(
             icon_name, fp,
             fp_ish.info()['content-type'].split(';')[0],  # strip off charset=x extra param,
             square=True, thumbnail_size=(48,48),
             thumbnail_meta={'project_id': self.project._id, 'category': 'icon'})

     def get_license(self):
         page = self.get_page('project_info')
         license = page.find(text='Code license').findNext().find('a').string.strip()
         trove = M.TroveCategory.query.get(fullname=self.LICENSE_MAP[license])
         self.project.trove_license.append(trove._id)

     def get_repo_type(self):
         page = self.get_page('source_browse')
         repo_type = page.find(id="crumb_root")
         if not repo_type:
             raise Exception("Couldn't detect repo type: no #crumb_root in "
                     "{0}".format(self.url))
         re_match = self.RE_REPO_TYPE.match(repo_type.text.lower())
         if re_match:
             return re_match.group(0)
         else:
             raise Exception("Unknown repo type: {0}".format(repo_type.text))
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import re
	import urllib
	from urlparse import urlparse, urljoin
	from collections import defaultdict
	try:
	from cStringIO import StringIO
	except ImportError:
	from StringIO import StringIO
	import logging

	from BeautifulSoup import BeautifulSoup

	from allura import model as M
	from forgeimporters.base import ProjectExtractor


	log = logging.getLogger(__name__)

	class GoogleCodeProjectExtractor(ProjectExtractor):
	BASE_URL = 'http://code.google.com'
	RE_REPO_TYPE = re.compile(r'(svn\|hg\|git)')

	PAGE_MAP = {
	'project_info': BASE_URL + '/p/%s/',
	'source_browse': BASE_URL + '/p/%s/source/browse/',
	}

	LICENSE_MAP = defaultdict(lambda:'Other/Proprietary License', {
	'Apache License 2.0': 'Apache Software License',
	'Artistic License/GPL': 'Artistic License',
	'Eclipse Public License 1.0': 'Eclipse Public License',
	'GNU GPL v2': 'GNU General Public License (GPL)',
	'GNU GPL v3': 'GNU General Public License (GPL)',
	'GNU Lesser GPL': 'GNU Library or Lesser General Public License (LGPL)',
	'MIT License': 'MIT License',
	'Mozilla Public License 1.1': 'Mozilla Public License 1.1 (MPL 1.1)',
	'New BSD License': 'BSD License',
	'Other Open Source': 'Other/Proprietary License',
	})

	DEFAULT_ICON = 'http://www.gstatic.com/codesite/ph/images/defaultlogo.png'

	def __init__(self, allura_project, gc_project_name, page=None):
	self.project = allura_project
	self.gc_project_name = gc_project_name
	self._page_cache = {}
	self.url = None
	self.page = None
	if page:
	self.get_page(page)

	def get_page(self, page_name_or_url):
	"""Return a Beautiful soup object for the given page name or url.

	If a page name is provided, the associated url is looked up in
	:attr:`PAGE_MAP`.

	Results are cached so that subsequent calls for the same page name or
	url will return the cached result rather than making another HTTP
	request.

	"""
	if page_name_or_url in self._page_cache:
	return self._page_cache[page_name_or_url]
	self.url = (self.get_page_url(page_name_or_url) if page_name_or_url in
	self.PAGE_MAP else page_name_or_url)
	self.page = self._page_cache[page_name_or_url] = \
	BeautifulSoup(self.urlopen(self.url))
	return self.page

	def get_page_url(self, page_name):
	"""Return the url associated with ``page_name``.

	Raises KeyError if ``page_name`` is not in :attr:`PAGE_MAP`.

	"""
	return self.PAGE_MAP[page_name] % urllib.quote(self.gc_project_name)

	def get_short_description(self):
	page = self.get_page('project_info')
	self.project.short_description = page.find(itemprop='description').string.strip()

	def get_icon(self):
	page = self.get_page('project_info')
	icon_url = urljoin(self.url, page.find(itemprop='image').attrMap['src'])
	if icon_url == self.DEFAULT_ICON:
	return
	icon_name = urllib.unquote(urlparse(icon_url).path).split('/')[-1]
	fp_ish = self.urlopen(icon_url)
	fp = StringIO(fp_ish.read())
	M.ProjectFile.save_image(
	icon_name, fp,
	fp_ish.info()['content-type'].split(';')[0], # strip off charset=x extra param,
	square=True, thumbnail_size=(48,48),
	thumbnail_meta={'project_id': self.project._id, 'category': 'icon'})

	def get_license(self):
	page = self.get_page('project_info')
	license = page.find(text='Code license').findNext().find('a').string.strip()
	trove = M.TroveCategory.query.get(fullname=self.LICENSE_MAP[license])
	self.project.trove_license.append(trove._id)

	def get_repo_type(self):
	page = self.get_page('source_browse')
	repo_type = page.find(id="crumb_root")
	if not repo_type:
	raise Exception("Couldn't detect repo type: no #crumb_root in "
	"{0}".format(self.url))
	re_match = self.RE_REPO_TYPE.match(repo_type.text.lower())
	if re_match:
	return re_match.group(0)
	else:
	raise Exception("Unknown repo type: {0}".format(repo_type.text))