blob: d001c37240cc344f3ae5ba38127d91b779cc8d5d [file] [log] [blame]
package com.atlassian.uwc.exporters;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.PropertyConfigurator;
/**
* exports pages from a Mediawiki database to text files.
* Requires a properties file. See sample properties file exporter.mediawiki.properties
* @author Laura Kolker
*
*/
public class MediaWikiExporter extends SQLExporter {
private static final String DEFAULT_ENCODING = "utf-8";
//CONSTANTS
//properties constants
private static final String DEFAULT_PROPERTIES_LOCATION = "exporter.mediawiki.properties";
protected static final String EXPORTER_PROPERTIES_OUTPUTDIR = "output";
private static final String EXPORTER_PROPERTIES_PASSWORD = "password";
private static final String EXPORTER_PROPRETIES_LOGIN = "login";
private static final String EXPORTER_PROPERTIES_DRIVER = "jdbc.driver.class";
private static final String EXPORTER_PROPERTIES_DBURL = "dbUrl";
private static final String EXPORTER_PROPERTIES_DBNAME = "databaseName";
private static final String EXPORTER_PROPERTIES_DBPREFIX = "dbPrefix";
private static final String EXPORTER_PROPERTIES_ENCODING = "encoding";
private static final String EXPORTER_PROPERTIES_URLENCODING = "urlencoding";
private static final String EXPORTER_PROPERTIES_HISTORY = "history";
private static final String EXPORTER_PROPERTIES_HISTORYSUFFIX = "history-suffix";
private static final String EXPORTER_PROPERTIES_UDMF = "udmf";
private static final String EXPORTER_PROPERTIES_ORIGTITLE = "origtitle";
//mediawiki database constants (accurate for Mediawiki 1.7.1. Are these different for other mediawikis?)
private static final String PAGE_TABLE = "page";
private static final String REV_TABLE = "revision";
private static final String TEXT_TABLE = "text";
private static final String COL_ID = "page_id";
private static final String COL_LATEST = "page_latest";
private static final String COL_TITLE = "page_title";
private static final String COL_NAMESPACE = "page_namespace";
private static final String COL_REV = "rev_id";
private static final String COL_REV_TEXT = "rev_text_id";
private static final String COL_REV_PAGE = "rev_page";
private static final String COL_REV_USER = "rev_user_text";
private static final String COL_REV_DATE = "rev_timestamp";
private static final String COL_TEXT_ID = "old_id";
private static final String COL_TEXT = "old_text";
private static final String NAMESPACE_INTERNAL = "12";
private static final String NAMESPACE_SPECIAL = "8";
//output directory
private static final String EXPORT_DIR = "exported_mediawiki_pages";
private String dbName;
private String dbUrl;
private String login;
private String password;
private String output;
private String jdbcDriver;
private String prefix;
private String encoding;
private String history;
private String historySuffix;
private String urlencoding;
private String udmf;
private String origtitle;
//optional sql properties
private String optPageSql;
private String optTextIdSql;
private String optTextSql;
private String optRevSql;
private String optUdmfSql;
private String optTitleCol;
private String optTextCol;
private String optPageIdCol;
private String optNamespaceCol;
private String optTextIdCol;
//namespace properties
private String nsPropIds;
private String nsPropExportAllCustom;
private String nsPropCustomMap;
private HashMap<Integer, String> nsCustomMap;
//Descriptive names for mediawiki namespaces that use numbers in the database
String[] namespaces = {"Pages", "Discussions", "Users", "UserDiscussions"};
/**
* entry method if we use this class as an App.
* properties must be in exporter.mediawiki.properties
* @param args
*/
public static void main(String[] args) {
MediaWikiExporter exp = new MediaWikiExporter();
Map propsMap = exp.getDbProperties(DEFAULT_PROPERTIES_LOCATION);
try {
exp.export(propsMap);
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
}
/**
* default properties grabber. used by main when this class is used as an app.
* @param filename path to properties file
* @return map of properties from properties file
*/
private Map getDbProperties(String filename) {
Properties props = new Properties();
Map propsMap = new HashMap();
try {
props.load(new FileInputStream(filename));
propsMap.put(EXPORTER_PROPERTIES_DBNAME, props.getProperty(EXPORTER_PROPERTIES_DBNAME));
propsMap.put(EXPORTER_PROPERTIES_DBURL, props.getProperty(EXPORTER_PROPERTIES_DBURL));
propsMap.put(EXPORTER_PROPERTIES_DRIVER, props.getProperty(EXPORTER_PROPERTIES_DRIVER));
propsMap.put(EXPORTER_PROPRETIES_LOGIN, props.getProperty(EXPORTER_PROPRETIES_LOGIN));
propsMap.put(EXPORTER_PROPERTIES_PASSWORD, props.getProperty(EXPORTER_PROPERTIES_PASSWORD));
propsMap.put(EXPORTER_PROPERTIES_OUTPUTDIR, props.getProperty(EXPORTER_PROPERTIES_OUTPUTDIR));
propsMap.put(EXPORTER_PROPERTIES_DBPREFIX, props.getProperty(EXPORTER_PROPERTIES_DBPREFIX));
} catch (FileNotFoundException e) {
log.error("Cannot find properties file");
e.printStackTrace();
} catch (IOException e) {
log.error("Cannot load properties file");
e.printStackTrace();
}
return propsMap;
}
/**
* exports the Mediawiki database described in the given properties
* to text files that will be written to the output directory
* @param properties Map of properties. Must contain keys: databaseName,
* dbUrl, jdbc.driver.class, login, password, output. See example file
* exporter.mediawiki.properties
* @throws SQLException
* @throws ClassNotFoundException
*/
public void export(Map properties) throws ClassNotFoundException, SQLException {
this.running = true;
//set up log4j
PropertyConfigurator.configure("log4j.properties");
//setup database connection
log.info("Exporting Mediawiki...");
connectToDB(properties);
//do the export
exportMediawiki();
//close the connection
closeDB();
//log status
if (this.running) log.info("Export Complete.");
this.running = false;
}
/**
* connects to the database described by the given properties
* @param props Map of properties. See example file export.mediawiki.properties
* @throws ClassNotFoundException
* @throws SQLException
*/
private void connectToDB(Map props) throws ClassNotFoundException, SQLException {
dbName = (String) props.get(EXPORTER_PROPERTIES_DBNAME);
dbUrl = (String) props.get(EXPORTER_PROPERTIES_DBURL);
login = (String) props.get(EXPORTER_PROPRETIES_LOGIN);
password = (String) props.get(EXPORTER_PROPERTIES_PASSWORD);
output = (String) props.get(EXPORTER_PROPERTIES_OUTPUTDIR);
jdbcDriver = (String) props.get(EXPORTER_PROPERTIES_DRIVER);
fillOptionalProperties(props);
connectToDB(jdbcDriver, dbUrl, dbName, login, password);
}
private void fillOptionalProperties(Map props) {
prefix = (String) props.get(EXPORTER_PROPERTIES_DBPREFIX);
if (prefix == null) prefix = "";
encoding = (String) props.get(EXPORTER_PROPERTIES_ENCODING);
if (encoding == null) encoding = DEFAULT_ENCODING;
urlencoding = (String) props.get(EXPORTER_PROPERTIES_URLENCODING);
if (urlencoding == null) urlencoding = "false";
history = (String) props.get(EXPORTER_PROPERTIES_HISTORY);
if (history == null) history = "false";
historySuffix = (String) props.get(EXPORTER_PROPERTIES_HISTORYSUFFIX);
if (historySuffix == null) historySuffix = "";
if (udmf == null) udmf = "";
udmf = (String) props.get(EXPORTER_PROPERTIES_UDMF);
if (origtitle == null) origtitle = "";
origtitle = (String) props.get(EXPORTER_PROPERTIES_ORIGTITLE);
//leave opt sql props null, if unfilled
optPageSql = (String) props.get("db.sql.pagedata");
optTextIdSql = (String) props.get("db.sql.textiddata");
optTextSql = (String) props.get("db.sql.textdata");
optRevSql = (String) props.get("db.sql.revdata");
optUdmfSql = (String) props.get("db.sql.udmfdata");
optTitleCol = (String) props.get("db.column.title");
optNamespaceCol = (String) props.get("db.column.namespace");
optPageIdCol = (String) props.get("db.column.pageid");
optTextIdCol = (String) props.get("db.column.textid");
optTextCol = (String) props.get("db.column.text");
//namespace properties
nsPropIds = (String) props.get("namespaces.ids");
nsPropExportAllCustom = (String) props.get("namespaces.exportallcustom");
nsPropCustomMap = (String) props.get("namespaces.customnamespace.mapping");
}
/**
* exports the mediawiki associated with the open database connection 'con'
* @throws SQLException if an error occurs while executing an sql command
*/
private void exportMediawiki() throws SQLException {
if (!this.running) return;
//prepare output directory
cleanOutputDir();
Vector pages = null;
//get syntax data from db
if (existsSqlProperties()) {
getMediaWikiPages(
optPageSql, optTextSql, optTitleCol, optTextCol,
optNamespaceCol, optPageIdCol, optTextIdCol);
}
else {
getMediaWikiPages();
}
}
/**
* deletes and recreates the output directory
*/
protected void cleanOutputDir() {
if (!this.running) return;
output = output + EXPORT_DIR;
File file = new File(output);
if (!file.exists()) {
log.info("Creating output directory: " + output);
file.mkdir();
}
else {
deleteDir(file);
file.mkdir();
log.info("Cleaning and creating output directory:" + output);
}
}
/**
* deletes the given file. This method is used recursively.
* @param file can be a directory or a file. Directory does not have to be empty.
*/
private void deleteDir(File file) {
//if file doesn't exist (shouldn't happen), just exit
if (!file.exists()) return;
String name = "";
try {
name = file.getCanonicalPath();
} catch (IOException e) {
log.error("Problem while deleting directory. No filename!");
e.printStackTrace();
}
//delete the file
if (file.delete()) {
log.debug("deleting " + name);
return;
}
else { // or delete the directory
File[] files = file.listFiles();
for (File f : files) {
deleteDir(f);
}
file.delete();
log.debug("deleting dir: " + name);
}
}
/**
* @return true if enough optional sql properties have been set
*/
private boolean existsSqlProperties() {
return (optPageSql != null
&& optTextSql != null
&& optTextIdSql != null
&& optTitleCol != null
&& optTextCol != null
&& optPageIdCol != null
&& optNamespaceCol != null
&& optTextIdCol != null);
}
/**
* get all the interesting mediawiki pages (not the Special ones)
* from the database
* @return Vector of MediaWikiPage objects containing titles and text, etc.
* @throws SQLException if an error occurs when executing the sql command
*/
private void getMediaWikiPages() throws SQLException {
ResultSet pagedata = null;
try {
//get pages
String pageSql = "select " +
COL_ID + ", " +
COL_NAMESPACE +", " +
COL_TITLE + ", " +
COL_LATEST + " " +
"from " + prefix + PAGE_TABLE + " " +
getNamespaceWhereClause() + ";";
pagedata = sql(pageSql);
try {
while (pagedata.next()) {
if (!this.running) return;
// page data
String id = pagedata.getString(COL_ID);
String latest = pagedata.getString(COL_LATEST);
String namespace = pagedata.getString(COL_NAMESPACE);
byte[] bytes2 = pagedata.getBytes(COL_TITLE); //get bytes, 'cause we might have unicode issues
String title = null;
try {
title = getTitle(bytes2);
} catch (UnsupportedEncodingException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
//get all the revision ids we need
Vector<String> allRevs = new Vector<String>();
if (gettingHistory()) {
allRevs = getAllRevIds(id); //handle histories
}
else {
allRevs.add(latest); //just the latest revision id
}
//user timestamp data
HashMap<String,String[]>revUdmfMap = null;
String udmfSql = "select " + COL_REV_USER + "," + COL_REV_DATE + "," + COL_REV +
" from " + prefix + REV_TABLE +
" where " + COL_REV_PAGE + "='" + id + "';";
if (gettingUserdate()) {
revUdmfMap = getUserDateMap(udmfSql); //rev_id -> [username,timestamp]
}
int numRevs = 1;
for (String rev : allRevs) {
//get the text id
String textIdSql = "select " + COL_REV_TEXT + " from " + prefix + REV_TABLE +
" where " + COL_REV + "='" + rev + "';";
ResultSet revdata = sql(textIdSql);
String textid = "";
while (revdata.next()) {
textid = revdata.getString(COL_REV_TEXT);
}
//get the text
String textSql = "select " + COL_TEXT + " from " + prefix + TEXT_TABLE +
" where " + COL_TEXT_ID + "='" + textid + "';";
ResultSet textdata = sql(textSql);
String text = "";
while (textdata.next() ) {
if (!this.running) return;
byte[] bytes = textdata.getBytes(COL_TEXT);
try {
text = new String(bytes, encoding);
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
if (gettingUserdate()) { //date for udmf framework: usernames and timestamps
if (!this.running) return;
String userdate = getUserDateData(rev, revUdmfMap);
text = userdate + text;
}
//save the data into a local object
MediaWikiPage mwpage = new MediaWikiPage(title, text, namespace, id, (numRevs++)+"");
//next: 1) handle URL decoding when converting, 2) handle other getMEdiawikiPages method, 3)refactor
//next: refactor you can use the jdb URL to set the UTF-8 encoding?
//output the file to the system
createFileLocally(mwpage);
revdata.close();
textdata.close();
}
}
} catch (SQLException e) {
log.error("Problem while examining data.");
e.printStackTrace();
}
} finally {
pagedata.close();
}
}
protected String getNamespaceWhereClause() {
String whereAllCustom = getExportAllCustomNamespaceProperty()?
COL_NAMESPACE + ">=100":
"";
String nsIds = getNamespaceIdsProperty();
String whereNSIds = "";
for (String id : nsIds.split(",")) {
if (!"".equals(whereNSIds)) whereNSIds += " or ";
if (id.matches("\\d+")) {
whereNSIds += COL_NAMESPACE + "=" + id;
}
}
if ("".equals(whereNSIds)) {
whereNSIds = "page_namespace=0 or page_namespace=2"; //default namespaces are main and user
}
String where = "";
if (!"".equals(whereAllCustom) && !"".equals(whereNSIds))
where = whereAllCustom + " or " + whereNSIds;
else where = whereAllCustom + whereNSIds;
if (!"".equals(where)) where = " where " + where;
return where;
}
private boolean getExportAllCustomNamespaceProperty() {
boolean defaultProp = true;
if (nsPropExportAllCustom == null) return defaultProp;
if ("".equals(nsPropExportAllCustom)) return defaultProp;
if (nsPropExportAllCustom.trim().matches("(?i)false")) return false;
else if (nsPropExportAllCustom.trim().matches("(?i)true")) return true;
return defaultProp;
}
private String getNamespaceIdsProperty() {
if (nsPropIds == null) return "";
return nsPropIds;
}
Pattern firstCol = Pattern.compile("^(?i)select\\s*(\\w*).*$");
Pattern allCols = Pattern.compile("^(?i)select\\s*(.*) from.*$");
/**
* get all the mediawiki pages from the database using the optional property SQL.
* @return Vector of MediaWikiPage objects containing titles and text, etc.
* @throws SQLException if an error occurs while executing the SQL command
*/
private void getMediaWikiPages(
String pageSql,
String textSql,
String titleColumn,
String textColumn,
String namespaceColumn,
String pageIdColumn,
String textIdColumn) throws SQLException {
String message = null;
ResultSet pageData, textData, textIdData;
pageData = textData = textIdData = null;
try {
message = pageSql;
pageData = sql(pageSql);
while (pageData.next()) {
if (!this.running) return;
//get the relevant strings
String latest = pageData.getString(textIdColumn);
String namespace = pageData.getString(namespaceColumn);
String id = pageData.getString(pageIdColumn);
byte[] bytes2 = pageData.getBytes(titleColumn); //get bytes, 'cause we might have unicode issues
String title = null;
try {
title = getTitle(bytes2);
} catch (UnsupportedEncodingException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
//replace references to page props with real data
String textSqlAdj = textSql.replaceAll("db.column.pageid", id);
textSqlAdj = textSqlAdj.replaceAll("db.column.title", title);
textSqlAdj = textSqlAdj.replaceAll("db.column.namespace", namespace);
//handle histories
Vector<String> allRevs = new Vector<String>();
if (gettingHistory()) {
if (optRevSql != null && !"".equals(optRevSql)) {
String revsql = optRevSql.replaceAll("db.column.pageid", id);
Matcher colFinder = firstCol.matcher(revsql);
if (colFinder.find()) {
String col = colFinder.group(1); //select SOMECOLUMN
allRevs = getAllRevIds(revsql, col);
}
else {
log.warn("Couldn't find return column. Using default revsql.");
allRevs = getAllRevIds(id);
}
}
else allRevs = getAllRevIds(id); //no optional rev sql
}
else {
allRevs.add(latest); //just the latest one
}
//handle user date data (udmf)
//user timestamp data
String defaultUdmfSql = "select " + COL_REV_USER + "," + COL_REV_DATE + "," + COL_REV +
" from " + prefix + REV_TABLE +
" where " + COL_REV_PAGE + "='" + id + "';";
HashMap<String,String[]>revUdmfMap = null;
if (gettingUserdate()) {
if (optUdmfSql != null && !"".equals(optUdmfSql)) {
String udmfSql = optUdmfSql.replaceAll("db.column.pageid", id);
Matcher colFinder = allCols.matcher(udmfSql);
if (colFinder.find()) {
String[] cols = colFinder.group(1).split(",");
revUdmfMap = getUserDateMap(udmfSql, cols); //rev_id -> [username,timestamp]
}
else {
log.warn("Couldn't find return columns. Using default revsql.");
revUdmfMap = getUserDateMap(defaultUdmfSql); //rev_id -> [username,timestamp]
}
}
else revUdmfMap = getUserDateMap(defaultUdmfSql); //no optional rev sql
}
int numRevs = 1;
String textSqlRepeater = textSqlAdj;
for (String rev : allRevs) {
//get text id
String textIdSql = optTextIdSql.replaceAll("db.column.textid", rev);
message = textIdSql;
textIdData = sql(textIdSql);
String textid = "";
while (textIdData.next()) {
if (!this.running) return;
textid = textIdData.getString(1); //get first column result
}
//get text
textSqlAdj = textSqlRepeater.replaceAll("db.column.textid", textid);
message = textSqlAdj;
textData = sql(textSqlAdj);
String text = "";
while (textData.next() ) {
if (!this.running) return;
byte[] bytes = textData.getBytes(COL_TEXT);
try {
text = new String(bytes, encoding);
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
if (gettingUserdate()) { //date for udmf framework: usernames and timestamps
if (!this.running) return;
String userdate = getUserDateData(rev, revUdmfMap);
text = userdate + text;
}
if (title == null || text == null || id == null) {
message = "title, text, or id is null. Check optional sql properties.";
throw new IllegalArgumentException();
}
//save the data into a local object
MediaWikiPage mwpage = new MediaWikiPage(title, text, namespace, id, (numRevs++)+"");
//output the file to the system
createFileLocally(mwpage);
textData.close();
textIdData.close();
}
}
} catch (SQLException e) {
log.error("Problem while running custom SQL: " + message);
throw e;
} finally {
pageData.close();
}
}
protected String getTitle(byte[] rawbytes) throws UnsupportedEncodingException {
if (encoding == null) encoding = DEFAULT_ENCODING;
String title = new String (rawbytes, encoding); //enforce utf-8 encoding
if (this.urlencoding != null && Boolean.parseBoolean(this.urlencoding))
return URLEncoder.encode(title, encoding) ; //make directory safe
return title;
}
/**
* creates files in the designated output directory (see output field)
* Text pages are created in Namespace directories below the output directory.
* @param pages MediaWikiPage Vector.
*/
private void createFilesLocally(Vector pages) {
for (Iterator iter = pages.iterator(); iter.hasNext();) {
if (!this.running) return;
MediaWikiPage page = (MediaWikiPage) iter.next();
createFileLocally(page);
}
}
/**
* creates one file for the given MediaWikiPage object
* @param page
*/
protected void createFileLocally(MediaWikiPage page) {
if (!this.running) return;
String filename = gettingHistory()?
createFilename(page.title, page.namespace, page.versionId):
createFilename(page.title, page.namespace);
if (gettingOrigTitle()) {
log.debug("Adding original title to content: " + page.title);
page.text += "\n" + "{orig-title:" + page.title +"}\n";
}
String filecontents = page.text;
String parent = getParent(page.namespace);
createFileLocally(filename, parent, filecontents);
}
/**
* creates the filename based on the page title.
* converts ":" characters to "__" characters.
* @param title String, example: ABC or Help:Abc
* @return filename, string. example: Abc.txt or Help__Abc.txt
*/
protected String createFilename(String title, String namespace) {
return createFilename(title, namespace, null);
}
/**
* creates the filename based on the page title.
* converts ":" characters to "__" characters.
* @param title String, example: ABC or Help:Abc
* @param namespace namespace id which identifies the namespace
* @param version page revision id, used with page history export
* @return filename as string. example: Abc.txt, Help__Abc.txt, Abc-23.txt
*/
protected String createFilename(String title, String namespace, String version) {
log.debug("getting filename from title: " + title);
String base = title;
Pattern colons = Pattern.compile(":|(?:%3A)");
Matcher colonFinder = colons.matcher(base);
if (colonFinder.find()) base = colonFinder.replaceAll("__");
Pattern fileDelims = Pattern.compile("[/\\\\]");
Matcher delimFinder = fileDelims.matcher(base);
if (delimFinder.find()) base = delimFinder.replaceAll("_");
String extension = ".txt";
if (version != null) {
String suffix = this.historySuffix;
Pattern hash = Pattern.compile("#");
Matcher hashFinder = hash.matcher(suffix);
if (hashFinder.find()) {
extension = hashFinder.replaceFirst(version);
}
else log.warn("Couldn't find # in history-suffix. Won't be able to preserve histories.");
}
int namespaceNum = Integer.parseInt(namespace);
String namespaceStr = "";
try {
if (namespaces[namespaceNum].endsWith("Discussions"))
namespaceStr = "_Discussion";
} catch (ArrayIndexOutOfBoundsException e) {
//XXX This Exception is caught and handled on purpose.
//We don't do anything if we don't know what the namespace it is;
//We only want to use known namespaces here, so if we run into
//an unknown one, we just don't use it.
}
String filename = base + namespaceStr + extension;
log.debug("new filename will be: " + filename);
return filename;
}
/**
* gets the parent directory, given the output (a field) and the namespace
* @param namespace
* @return output/namespace/
*/
private String getParent(String namespace) {
String parent = "";
try {
int nsId = Integer.parseInt(namespace);
String nsName = "";
if (nsId > (namespaces.length-1)) {
nsName = getNamespaceDirName(nsId);
}
else nsName = namespaces[nsId];
parent = output + File.separator + nsName + File.separator;
} catch (ArrayIndexOutOfBoundsException e) {
//this shouldn't happen, but we'll handle this just in case
parent = output + File.separator + "Misc" + File.separator;
}
log.debug("Parent directory = " + parent);
return parent;
}
protected String getNamespaceDirName(int nsId) {
HashMap<Integer,String> nsCustomMap = getNamespaceCustomMap();
if (nsCustomMap.containsKey(nsId)) return nsCustomMap.get(nsId);
return nsId+"";
}
private HashMap<Integer, String> getNamespaceCustomMap() {
if (this.nsCustomMap == null) {
this.nsCustomMap = new HashMap<Integer, String>();
if (this.nsPropCustomMap == null) return this.nsCustomMap;
for (String pair : this.nsPropCustomMap.split(",")) {
String[] parts = pair.split("=>");
if (parts != null && parts.length == 2) {
String key = parts[0];
String val = parts[1];
if (!key.matches("\\d+")) {
log.error("Invalid property: namespaces.customnamespace.mapping");
continue;
}
this.nsCustomMap.put(Integer.parseInt(key), val);
}
else {
log.error("Invalid property: namespaces.customnamespace.mapping");
}
}
}
return this.nsCustomMap;
}
/**
* Creates the file at the given parentDir/filename, and writes the
* given filecontents to that file
* @param filename string, filename to be created. ex: Abc.txt
* @param parentDir string, parentdir filename will exist in. ex: outputdir/namespace/
* @param filecontents, string, text to be written to the newly created file
*/
private void createFileLocally(String filename, String parentDir, String filecontents) {
File parent = new File(parentDir);
String fullpath = parent + File.separator + filename;
File file = new File(fullpath);
String message = "";
try {
message = "Checking for parent directory";
if (!parent.exists())
parent.mkdir();
log.debug(message);
message = "Creating new file: " + fullpath;
file.createNewFile();
log.debug(message);
message = "Sending text to new file: " + fullpath;
writeFile(fullpath, filecontents);
log.debug(message);
} catch (IOException e) {
log.error("Problem while " + message);
e.printStackTrace();
}
}
/**
* writes the given text to the file at the given path
* @param path string, filepath where text will be written
* @param text string, text to write to filepath
*/
protected void writeFile(String path, String text) {
writeFile(path, text, encoding);
}
/* Start History Methods */
private boolean gettingHistory() {
try {
return Boolean.parseBoolean(this.history);
} catch (RuntimeException e){
return false;
}
}
private Vector<String> getAllRevIds(String id) throws SQLException {
String col = COL_REV; //rev ids (not rev text ids)
String sql = "select " + col +
" from " + prefix + REV_TABLE +
" where " + COL_REV_PAGE + "=" + id;
return getAllRevIds(sql, col);
}
/**
*
* @param sql SQL statement that will be run to get all the revision ids for
* a given page id.
* @param col the name of the column that is associated with revision id.
* @return Vector of revision ids
* @throws SQLException
*/
private Vector<String> getAllRevIds(String sql, String col) throws SQLException {
ResultSet data = null;
try {
data = sql(sql);
Vector<String> all = new Vector<String>();
while (data.next()) {
if (!this.running) return null;
String rev = data.getString(col);
all.add(rev);
}
return all;
}
finally {
data.close();
}
}
/* End History Methods */
/* Start User Date (udmf) Methods */
private boolean gettingUserdate() {
try {
return Boolean.parseBoolean(this.udmf);
} catch (RuntimeException e){
return false;
}
}
private HashMap<String, String[]> getUserDateMap(String sql) throws SQLException {
String[] cols = {COL_REV, COL_REV_USER, COL_REV_DATE};
return getUserDateMap(sql, cols);
}
private HashMap<String, String[]> getUserDateMap(String sql, String[] returncols) throws SQLException {
HashMap<String, String[]> map = new HashMap<String, String[]>();
ResultSet data = null;
try {
data = sql(sql);
String user = "", date = "", rev = "";
while (data.next()) {
rev = data.getString(returncols[0].trim());
byte[] userbytes = data.getBytes(returncols[1].trim());
try {
user = new String(userbytes, encoding);
} catch (UnsupportedEncodingException e) {
log.warn("Problem with encoding: " + encoding);
e.printStackTrace();
user = data.getString(returncols[1].trim());
}
date = data.getString(returncols[2].trim());
String[] val = {user,date};
map.put(rev, val);
}
} finally {
data.close();
}
return map;
}
private String getUserDateData(String rev, HashMap<String, String[]> revUdmfMap) {
if (!revUdmfMap.containsKey(rev)) return "";
String[] data = revUdmfMap.get(rev);
String userdate = ("".equals(data[0])?"":"{user:" + data[0] + "}\n") +
"{timestamp:" + data[1] + "}\n";
return userdate;
}
/* End User Date (udmf) Methods */
/* Orig Title Methods */
private boolean gettingOrigTitle() {
try {
return Boolean.parseBoolean(this.origtitle);
} catch (RuntimeException e){
return false;
}
}
/* Start Setters/Getters */
protected void setEncoding(String encoding) {
this.encoding = encoding; //useful for junit
}
/**
* @param urlencoding true/false
*/
protected void setUrlEncoding(String urlencoding) {
this.urlencoding = urlencoding; //useful for junit
}
/**
* simple class to hold page data we might need when outputing the file
* @author Laura Kolker
*/
private class MediaWikiPage {
public String title;
public String text;
public String namespace;
public String id;
public String versionId;
MediaWikiPage (String title, String text, String namespace, String id, String versionId) {
this.title = title;
this.text = text;
this.namespace = namespace;
this.id = id;
this.versionId = versionId;
}
}
}