[#6767] Use .text instead of .string on BeautifulSoup objects
If the element contains no text, .text will return u'' whereas
.string will return None
Signed-off-by: Tim Van Steenburgh <tvansteenburgh@gmail.com>
diff --git a/ForgeImporters/forgeimporters/google/__init__.py b/ForgeImporters/forgeimporters/google/__init__.py
index 725193f..3313ee8 100644
--- a/ForgeImporters/forgeimporters/google/__init__.py
+++ b/ForgeImporters/forgeimporters/google/__init__.py
@@ -110,7 +110,7 @@
def get_short_description(self, project):
page = self.get_page('project_info')
- project.short_description = page.find(itemprop='description').string.strip()
+ project.short_description = page.find(itemprop='description').text.strip()
def get_icon(self, project):
page = self.get_page('project_info')
@@ -126,7 +126,7 @@
def get_license(self, project):
page = self.get_page('project_info')
- license = page.find(text='Code license').findNext().find('a').string.strip()
+ license = page.find(text='Code license').findNext().find('a').text.strip()
trove = M.TroveCategory.query.get(fullname=self.LICENSE_MAP[license])
project.trove_license.append(trove._id)
@@ -168,9 +168,9 @@
extractor.get_page('issues_csv', parser=csv_parser, start=start)
def get_issue_summary(self):
- text = self.page.find(id='issueheader').findAll('td', limit=2)[1].span.string.strip()
+ text = self.page.find(id='issueheader').findAll('td', limit=2)[1].span.text.strip()
bs = BeautifulSoup(text, convertEntities=BeautifulSoup.HTML_ENTITIES)
- return bs.string
+ return bs.text
def get_issue_description(self):
return _as_text(self.page.find(id='hc0').pre).strip()
@@ -193,7 +193,7 @@
def get_issue_status(self):
tag = self.page.find(id='issuemeta').find('th', text=re.compile('Status:')).findNext().span
if tag:
- return tag.string.strip()
+ return tag.text.strip()
else:
return ''
@@ -224,7 +224,7 @@
class UserLink(object):
def __init__(self, tag):
- self.name = tag.string.strip()
+ self.name = tag.text.strip()
if tag.get('href'):
self.url = urljoin(GoogleCodeProjectExtractor.BASE_URL, tag.get('href'))
else:
diff --git a/ForgeImporters/forgeimporters/tests/google/test_extractor.py b/ForgeImporters/forgeimporters/tests/google/test_extractor.py
index 4eff44f..2fe82fa 100644
--- a/ForgeImporters/forgeimporters/tests/google/test_extractor.py
+++ b/ForgeImporters/forgeimporters/tests/google/test_extractor.py
@@ -76,7 +76,7 @@
def test_get_short_description(self):
extractor = google.GoogleCodeProjectExtractor('my-project', 'project_info')
- extractor.page.find.return_value.string = 'My Super Project'
+ extractor.page.find.return_value.text = 'My Super Project'
extractor.get_short_description(self.project)
@@ -104,7 +104,7 @@
def test_get_license(self, M):
self.project.trove_license = []
extractor = google.GoogleCodeProjectExtractor('my-project', 'project_info')
- extractor.page.find.return_value.findNext.return_value.find.return_value.string = ' New BSD License '
+ extractor.page.find.return_value.findNext.return_value.find.return_value.text = ' New BSD License '
trove = M.TroveCategory.query.get.return_value
extractor.get_license(self.project)
@@ -116,7 +116,7 @@
M.TroveCategory.query.get.assert_called_once_with(fullname='BSD License')
M.TroveCategory.query.get.reset_mock()
- extractor.page.find.return_value.findNext.return_value.find.return_value.string = 'non-existant license'
+ extractor.page.find.return_value.findNext.return_value.find.return_value.text = 'non-existant license'
extractor.get_license(self.project)
M.TroveCategory.query.get.assert_called_once_with(fullname='Other/Proprietary License')
@@ -191,6 +191,24 @@
self.assertEqual(gpe.get_issue_created_date(), 'Thu Aug 8 15:33:52 2013')
self.assertEqual(gpe.get_issue_stars(), 1)
+ def test_get_issue_summary(self):
+ html = u"""
+ <div id="issueheader">
+ <table>
+ <tbody>
+ <tr>
+ <td></td>
+ <td><span>%s</span></td>
+ </tr>
+ </tbody>
+ </table>
+ </div>
+ """
+ gpe = self._make_extractor(html % u'')
+ self.assertEqual(gpe.get_issue_summary(), u'')
+ gpe = self._make_extractor(html % u'My Summary')
+ self.assertEqual(gpe.get_issue_summary(), u'My Summary')
+
def test_get_issue_mod_date(self):
test_issue = open(pkg_resources.resource_filename('forgeimporters', 'tests/data/google/test-issue.html')).read()
gpe = self._make_extractor(test_issue)
@@ -273,14 +291,14 @@
class TestUserLink(TestCase):
def test_plain(self):
tag = mock.Mock()
- tag.string.strip.return_value = 'name'
+ tag.text.strip.return_value = 'name'
tag.get.return_value = None
link = google.UserLink(tag)
self.assertEqual(str(link), 'name')
def test_linked(self):
tag = mock.Mock()
- tag.string.strip.return_value = 'name'
+ tag.text.strip.return_value = 'name'
tag.get.return_value = '/p/project'
link = google.UserLink(tag)
self.assertEqual(str(link), '[name](http://code.google.com/p/project)')