[#5870] Make wiki2markdown a ScriptTask
diff --git a/ForgeWiki/forgewiki/command/__init__.py b/ForgeWiki/forgewiki/command/__init__.py
deleted file mode 100644
index f311f7d..0000000
--- a/ForgeWiki/forgewiki/command/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from wiki2markdown import Wiki2MarkDownCommand
diff --git a/ForgeWiki/forgewiki/command/base.py b/ForgeWiki/forgewiki/command/base.py
deleted file mode 100644
index 4634e11..0000000
--- a/ForgeWiki/forgewiki/command/base.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from allura.command.base import Command
-
-class WikiCommand(Command):
- group_name = 'ForgeWiki'
diff --git a/ForgeWiki/forgewiki/command/wiki2markdown/__init__.py b/ForgeWiki/forgewiki/command/wiki2markdown/__init__.py
deleted file mode 100644
index 051ce83..0000000
--- a/ForgeWiki/forgewiki/command/wiki2markdown/__init__.py
+++ /dev/null
@@ -1,94 +0,0 @@
-from allura.command import base as allura_base
-from allura.lib import helpers as h
-
-from forgewiki.command.base import WikiCommand
-from forgewiki.command.wiki2markdown.extractors import MySQLExtractor
-from forgewiki.command.wiki2markdown.loaders import MediawikiLoader
-
-class Wiki2MarkDownCommand(WikiCommand):
- """Import MediaWiki to Allura Wiki tool"""
- min_args = 1
- max_args = None
- summary = 'Import wiki from mediawiki-dump to allura wiki'
-
- parser = WikiCommand.standard_parser(verbose=True)
- parser.add_option('-e', '--extract-only', action='store_true',
- dest='extract',
- help='Store data from the mediawiki-dump '
- 'on the local filesystem; not load into Allura')
- parser.add_option('-l', '--load-only', action='store_true', dest='load',
- help='Load into Allura previously-extracted data')
- parser.add_option('-d', '--dump-dir', dest='dump_dir', default='',
- help='Directory for dump files')
- parser.add_option('-n', '--neighborhood', dest='nbhd', default='',
- help='Neighborhood name to load data')
- parser.add_option('-p', '--project', dest='project', default='',
- help='Project shortname to load data into')
- parser.add_option('-a', '--attachments-dir', dest='attachments_dir',
- help='Path to directory with mediawiki attachments dump',
- default='')
-
- parser.add_option('--db_config_prefix', dest='db_config_prefix',
- help='Key prefix (e.g. "legacy.") in ini file to use instead of commandline db params')
-
- parser.add_option('-s', '--source', dest='source', default='',
- help='Database type to extract from (only mysql for now)')
- parser.add_option('--db_name', dest='db_name', default='mediawiki',
- help='Database name')
- parser.add_option('--host', dest='host', default='localhost',
- help='Database host')
- parser.add_option('--port', dest='port', type='int', default=0,
- help='Database port')
- parser.add_option('--user', dest='user', default='',
- help='User for database connection')
- parser.add_option('--password', dest='password', default='',
- help='Password for database connection')
-
-
- def command(self):
- self.basic_setup()
- self.handle_options()
-
- if self.options.extract:
- self.extractor.extract()
- if self.options.load:
- self.loader = MediawikiLoader(self.options)
- self.loader.load()
-
- def handle_options(self):
- if not self.options.dump_dir:
- allura_base.log.error('You must specify directory for dump files')
- exit(2)
-
- if not self.options.extract and not self.options.load:
- # if action doesn't specified - do both
- self.options.extract = True
- self.options.load = True
-
- if self.options.load and (not self.options.project
- or not self.options.nbhd):
- allura_base.log.error('You must specify neighborhood and project '
- 'to load data')
- exit(2)
-
- if self.options.extract:
- if self.options.db_config_prefix:
- for k, v in h.config_with_prefix(self.config, self.options.db_config_prefix).iteritems():
- if k == 'port':
- v = int(v)
- setattr(self.options, k, v)
-
- if self.options.source == 'mysql':
- self.extractor = MySQLExtractor(self.options)
- elif self.options.source in ('sqlite', 'postgres', 'sql-dump'):
- allura_base.log.error('This source not implemented yet.'
- 'Only mysql for now')
- exit(2)
- else:
- allura_base.log.error('You must specify valid data source')
- exit(2)
-
- if not self.options.attachments_dir:
- allura_base.log.error('You must specify path to directory '
- 'with mediawiki attachmets dump.')
- exit(2)
diff --git a/ForgeWiki/forgewiki/scripts/__init__.py b/ForgeWiki/forgewiki/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/ForgeWiki/forgewiki/scripts/__init__.py
diff --git a/ForgeWiki/forgewiki/scripts/wiki2markdown/__init__.py b/ForgeWiki/forgewiki/scripts/wiki2markdown/__init__.py
new file mode 100644
index 0000000..2fe4b24
--- /dev/null
+++ b/ForgeWiki/forgewiki/scripts/wiki2markdown/__init__.py
@@ -0,0 +1 @@
+from wiki2markdown import Wiki2Markdown
diff --git a/ForgeWiki/forgewiki/command/wiki2markdown/extractors.py b/ForgeWiki/forgewiki/scripts/wiki2markdown/extractors.py
similarity index 86%
rename from ForgeWiki/forgewiki/command/wiki2markdown/extractors.py
rename to ForgeWiki/forgewiki/scripts/wiki2markdown/extractors.py
index 17c1940..8d95317 100644
--- a/ForgeWiki/forgewiki/command/wiki2markdown/extractors.py
+++ b/ForgeWiki/forgewiki/scripts/wiki2markdown/extractors.py
@@ -1,9 +1,10 @@
+import logging
import os
import shutil
import json
import hashlib
-from allura.command import base as allura_base
+log = logging.getLogger(__name__)
class MediawikiExtractor(object):
@@ -45,11 +46,7 @@
raise ImportError('GPL library MySQL-python is required for this operation')
if not self._connection:
- try:
- self._connection = MySQLdb.connect(**self.db_options)
- except MySQLdb.DatabaseError, e:
- allura_base.log.error("Can't connect to database: %s" % str(e))
- exit(2)
+ self._connection = MySQLdb.connect(**self.db_options)
return self._connection
def _save(self, content, *paths):
@@ -141,12 +138,12 @@
self.extract_pages()
def extract_pages(self):
- allura_base.log.info('Extracting pages...')
+ log.info('Extracting pages...')
for page in self._pages():
self.extract_history(page)
self.extract_talk(page)
self.extract_attachments(page)
- allura_base.log.info('Extracting pages done')
+ log.info('Extracting pages done')
def extract_history(self, page):
page_id = page['page_id']
@@ -154,8 +151,7 @@
page_data.update(page)
self._save(json.dumps(page_data), 'pages', str(page_id),
'history', str(page_data['timestamp']) + '.json')
- allura_base.log.info('Extracted history for page %s (%s)'
- % (page_id, page['title']))
+ log.info('Extracted history for page %s (%s)', page_id, page['title'])
def extract_talk(self, page):
page_id = page['page_id']
@@ -163,16 +159,13 @@
if talk_page_data:
self._save(json.dumps(talk_page_data), 'pages', str(page_id),
'discussion.json')
- allura_base.log.info('Extracted talk for page %s (%s)'
- % (page_id, page['title']))
-
- allura_base.log.info('No talk for page %s (%s)'
- % (page_id, page['title']))
+ log.info('Extracted talk for page %s (%s)', page_id, page['title'])
+ else:
+ log.info('No talk for page %s (%s)', page_id, page['title'])
def extract_attachments(self, page):
page_id = page['page_id']
for filepath in self._attachments(page_id):
self._save_attachment(filepath, 'pages', str(page_id),
'attachments')
- allura_base.log.info('Extracted attachments for page %s (%s)'
- % (page_id, page['title']))
+ log.info('Extracted attachments for page %s (%s)', page_id, page['title'])
diff --git a/ForgeWiki/forgewiki/command/wiki2markdown/loaders.py b/ForgeWiki/forgewiki/scripts/wiki2markdown/loaders.py
similarity index 76%
rename from ForgeWiki/forgewiki/command/wiki2markdown/loaders.py
rename to ForgeWiki/forgewiki/scripts/wiki2markdown/loaders.py
index d7a3ce4..00487a1 100644
--- a/ForgeWiki/forgewiki/command/wiki2markdown/loaders.py
+++ b/ForgeWiki/forgewiki/scripts/wiki2markdown/loaders.py
@@ -1,3 +1,4 @@
+import logging
import os
import json
import datetime
@@ -8,11 +9,12 @@
from forgewiki import model as WM
from forgewiki.converters import mediawiki2markdown
from forgewiki.converters import mediawiki_internal_links2markdown
-from allura.command import base as allura_base
from allura.lib import helpers as h
from allura.lib import utils
from allura.model.session import artifact_orm_session
+log = logging.getLogger(__name__)
+
class MediawikiLoader(object):
"""Load MediaWiki data from json to Allura wiki tool"""
@@ -22,39 +24,31 @@
self.options = options
self.nbhd = M.Neighborhood.query.get(name=options.nbhd)
if not self.nbhd:
- allura_base.log.error("Can't find neighborhood with name %s"
+ raise ValueError("Can't find neighborhood with name %s"
% options.nbhd)
- exit(2)
self.project = M.Project.query.get(shortname=options.project,
neighborhood_id=self.nbhd._id)
if not self.project:
- allura_base.log.error("Can't find project with shortname %s "
+ raise ValueError("Can't find project with shortname %s "
"and neighborhood_id %s"
% (options.project, self.nbhd._id))
- exit(2)
self.wiki = self.project.app_instance('wiki')
if not self.wiki:
- allura_base.log.error("Can't find wiki app in given project")
- exit(2)
+ raise ValueError("Can't find wiki app in given project")
h.set_context(self.project.shortname, 'wiki', neighborhood=self.nbhd)
- self.project.notifications_disabled = True
-
- def exit(self, status):
- self.project.notifications_disabled = False
- ThreadLocalORMSession.flush_all()
- ThreadLocalORMSession.close_all()
- exit(status)
def load(self):
- artifact_orm_session._get().skip_mod_date = True
- self.load_pages()
- self.project.notifications_disabled = False
- artifact_orm_session._get().skip_mod_date = False
- ThreadLocalORMSession.flush_all()
- ThreadLocalORMSession.close_all()
- allura_base.log.info('Loading wiki done')
+ try:
+ self.project.notifications_disabled = True
+ artifact_orm_session._get().skip_mod_date = True
+ self.load_pages()
+ ThreadLocalORMSession.flush_all()
+ log.info('Loading wiki done')
+ finally:
+ self.project.notifications_disabled = False
+ artifact_orm_session._get().skip_mod_date = False
def _pages(self):
"""Yield path to page dump directory for next wiki page"""
@@ -81,12 +75,11 @@
with open(fn, 'r') as pages_file:
page_data = json.load(pages_file)
except IOError, e:
- allura_base.log.error("Can't open file: %s" % str(e))
- self.exit(2)
+ log.error("Can't open file: %s", str(e))
+ raise
except ValueError, e:
- allura_base.log.error("Can't load data from file %s: %s"
- % (fn, str(e)))
- self.exit(2)
+ log.error("Can't load data from file %s: %s", fn, str(e))
+ raise
yield page_data
def _talk(self, page_dir):
@@ -98,12 +91,11 @@
with open(filename, 'r') as talk_file:
talk_data = json.load(talk_file)
except IOError, e:
- allura_base.log.error("Can't open file: %s" % str(e))
- self.exit(2)
+ log.error("Can't open file: %s", str(e))
+ raise
except ValueError, e:
- allura_base.log.error("Can't load data from file %s: %s"
- % (filename, str(e)))
- self.exit(2)
+ log.error("Can't load data from file %s: %s", filename, str(e))
+ raise
return talk_data
def _attachments(self, page_dir):
@@ -117,7 +109,7 @@
def load_pages(self):
"""Load pages with edit history from json to Allura wiki tool"""
- allura_base.log.info('Loading pages into allura...')
+ log.info('Loading pages into allura...')
for page_dir in self._pages():
for page in self._history(page_dir):
p = WM.Page.upsert(page['title'])
@@ -138,8 +130,7 @@
gl = WM.Globals.query.get(app_config_id=self.wiki.config._id)
if gl is not None:
gl.root = page['title']
- allura_base.log.info('Loaded history of page %s (%s)'
- % (page['page_id'], page['title']))
+ log.info('Loaded history of page %s (%s)', page['page_id'], page['title'])
self.load_talk(page_dir, page['title'])
self.load_attachments(page_dir, page['title'])
@@ -171,7 +162,7 @@
thread_id=thread._id,
timestamp=timestamp,
ignore_security=True)
- allura_base.log.info('Loaded talk for page %s' % page_title)
+ log.info('Loaded talk for page %s', page_title)
def load_attachments(self, page_dir, page_title):
"""Load attachments for page.
@@ -186,6 +177,6 @@
page.attach(filename, fp,
content_type=utils.guess_mime_type(filename))
except IOError, e:
- allura_base.log.error("Can't open file: %s" % str(e))
- self.exit(2)
- allura_base.log.info('Loaded attachments for page %s.' % page_title)
+ log.error("Can't open file: %s", str(e))
+ raise
+ log.info('Loaded attachments for page %s.', page_title)
diff --git a/ForgeWiki/forgewiki/scripts/wiki2markdown/wiki2markdown.py b/ForgeWiki/forgewiki/scripts/wiki2markdown/wiki2markdown.py
new file mode 100644
index 0000000..e837f45f
--- /dev/null
+++ b/ForgeWiki/forgewiki/scripts/wiki2markdown/wiki2markdown.py
@@ -0,0 +1,109 @@
+import argparse
+import logging
+import shutil
+import tempfile
+
+from tg import config
+
+from allura.lib import helpers as h
+from allura.scripts import ScriptTask
+
+from forgewiki.scripts.wiki2markdown.extractors import MySQLExtractor
+from forgewiki.scripts.wiki2markdown.loaders import MediawikiLoader
+
+log = logging.getLogger(__name__)
+
+
+class Wiki2Markdown(ScriptTask):
+ """Import MediaWiki to Allura Wiki tool"""
+ @classmethod
+ def parser(cls):
+ parser = argparse.ArgumentParser(description='Import wiki from'
+ 'mediawiki-dump to allura wiki')
+ parser.add_argument('-e', '--extract-only', action='store_true',
+ dest='extract',
+ help='Store data from the mediawiki-dump '
+ 'on the local filesystem; not load into Allura')
+ parser.add_argument('-l', '--load-only', action='store_true', dest='load',
+ help='Load into Allura previously-extracted data')
+ parser.add_argument('-d', '--dump-dir', dest='dump_dir', default='',
+ help='Directory for dump files')
+ parser.add_argument('-n', '--neighborhood', dest='nbhd', default='',
+ help='Neighborhood name to load data')
+ parser.add_argument('-p', '--project', dest='project', default='',
+ help='Project shortname to load data into')
+ parser.add_argument('-a', '--attachments-dir', dest='attachments_dir',
+ help='Path to directory with mediawiki attachments dump',
+ default='')
+ parser.add_argument('--db_config_prefix', dest='db_config_prefix',
+ help='Key prefix (e.g. "legacy.") in ini file to '
+ 'use instead of commandline db params')
+ parser.add_argument('-s', '--source', dest='source', default='mysql',
+ help='Database type to extract from (only mysql for now)')
+ parser.add_argument('--db_name', dest='db_name', default='mediawiki',
+ help='Database name')
+ parser.add_argument('--host', dest='host', default='localhost',
+ help='Database host')
+ parser.add_argument('--port', dest='port', type=int, default=0,
+ help='Database port')
+ parser.add_argument('--user', dest='user', default='',
+ help='User for database connection')
+ parser.add_argument('--password', dest='password', default='',
+ help='Password for database connection')
+ parser.add_argument('--keep-dumps', action='store_true', dest='keep_dumps',
+ help='Leave dump files on disk after run')
+ return parser
+
+ @classmethod
+ def execute(cls, options):
+ options = cls.handle_options(options)
+
+ try:
+ if options.extract:
+ MySQLExtractor(options).extract()
+ if options.load:
+ MediawikiLoader(options).load()
+ finally:
+ if not options.keep_dumps:
+ shutil.rmtree(options.dump_dir)
+
+ @classmethod
+ def handle_options(cls, options):
+ if not options.extract and not options.load:
+ # if action doesn't specified - do both
+ options.extract = True
+ options.load = True
+
+ if not options.dump_dir:
+ if options.load and not options.extract:
+ raise ValueError('You must specify directory containing dump files')
+ else:
+ options.dump_dir = tempfile.mkdtemp()
+ log.info("Writing temp files to %s", options.dump_dir)
+
+ if options.load and (not options.project or not options.nbhd):
+ raise ValueError('You must specify neighborhood and project '
+ 'to load data')
+
+ if options.extract:
+ if options.db_config_prefix:
+ for k, v in h.config_with_prefix(config, options.db_config_prefix).iteritems():
+ if k == 'port':
+ v = int(v)
+ setattr(options, k, v)
+
+ if options.source == 'mysql':
+ pass
+ elif options.source in ('sqlite', 'postgres', 'sql-dump'):
+ raise ValueError('This source not implemented yet. Only mysql for now')
+ else:
+ raise ValueError('You must specify a valid data source')
+
+ if not options.attachments_dir:
+ raise ValueError('You must specify path to directory with mediawiki attachmets dump.')
+
+ return options
+
+
+if __name__ == '__main__':
+ Wiki2Markdown.main()
diff --git a/ForgeWiki/forgewiki/tests/test_wiki2markdown.py b/ForgeWiki/forgewiki/tests/test_wiki2markdown.py
index 7aa9a03..f647bde 100644
--- a/ForgeWiki/forgewiki/tests/test_wiki2markdown.py
+++ b/ForgeWiki/forgewiki/tests/test_wiki2markdown.py
@@ -3,8 +3,8 @@
from datetime import datetime
from IPython.testing.decorators import module_not_available, skipif
-from forgewiki.command.wiki2markdown.extractors import MySQLExtractor
-from forgewiki.command.wiki2markdown.loaders import MediawikiLoader
+from forgewiki.scripts.wiki2markdown.extractors import MySQLExtractor
+from forgewiki.scripts.wiki2markdown.loaders import MediawikiLoader
from alluratest.controller import setup_basic_test
from allura import model as M
from forgewiki import model as WM