blob: 35317ddc49776cbb278730e3bf88a4c97c7eec4e [file] [log] [blame]
package com.atlassian.uwc.exporters;
import java.io.BufferedOutputStream;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.StringReader;
import java.util.Map;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpConnectionManager;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.UsernamePasswordCredentials;
import org.apache.commons.httpclient.auth.AuthScope;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpConnectionManagerParams;
import org.apache.log4j.Logger;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import com.atlassian.uwc.converters.tikiwiki.RegexUtil;
import com.atlassian.uwc.exporters.mindtouch.MindtouchFileIdParser;
import com.atlassian.uwc.exporters.mindtouch.MindtouchPage;
import com.atlassian.uwc.exporters.mindtouch.MindtouchPagelistParser;
public class MindtouchExporter implements Exporter {
private static final String EXT_ATT = "attachments";
private static final String EXT_SUB = "subpages";
private static final String FILECONTENT_TAGEND = "</pagedata>";
private static final String FILECONTENT_TAGSTART = "<pagedata>";
private static final String EXT_FILE = ".xml";
private static final String FILESYS_DELIM = "_";
private static final String DEFAULT_USERAGENT = "Universal Wiki Converter";
private static final String DEFAULT_BUFFERSIZE = "1024";
private static final String DEFAULT_TIMEOUT_MS = "10000";
private static final String DEFAULT_IGNOREMT = "true";
private static final String DEFAULT_PORT = "80";
private static final String EXPORT_DIR = "exported_mindtouch_pages";
public static final String PROPKEY_URLBASE = "url.base";
public static final String PROPKEY_USER = "user";
public static final String PROPKEY_PASS = "pass";
public static final String PROPKEY_URLPORT = "url.port";
public static final String PROPKEY_USERAGENT = "user-agent";
public static final String PROPKEY_OUTPUTDIR = "output.dir";
public static final String PROPKEY_BUFFERSIZE = "att.buffer";
public static final String PROPKEY_TIMEOUT = "timeout";
public static final String PROPKEY_IGNOREMT = "ignore.mindtouch";
private boolean running = false;
private Map properties;
private HttpClient client;
private int error;
Logger log = Logger.getLogger(this.getClass());
public void export(Map propertiesMap) {
log.info("Exporting Mindtouch...");
this.running = true;
setProperties(propertiesMap);
export();
this.running = false;
log.info("Export Complete.");
}
private void export() {
if (!this.running) return;
log.info("Getting page tree.");
Vector<MindtouchPage> pages = getPages();
if (pages == null) return;
log.info("Getting page content.");
pages = getContent(pages);
log.info("Getting page tags.");
pages = getTags(pages);
log.info("Getting page comments.");
pages = getComments(pages);
log.info("Getting attachment info.");
pages = getAttachments(pages);
outputPageData(pages);
}
protected Vector<MindtouchPage> getPages() {
if (!this.running) return null;
String xml = getPagelistXml();
if (xml == null) return null;
Vector<MindtouchPage> pages = parsePageXml(xml);
if (isIgnoringMindtouch()) {
pages = MindtouchPage.removeNode("MindTouch", pages);
}
return pages;
}
protected String getPagelistXml() {
if (!this.running) return null;
String baseurl = getBaseUrl();
int port = getPort();
String user = getUser();
String pass = getPass();
String apiurl = createApiUrl(baseurl);
String forceAuthentication = "?authenticate=true";
//rest api for sitemap
//http://developer.mindtouch.com/Deki/API_Reference/GET%3apages
String url = apiurl + "pages/" + forceAuthentication;
GetMethod method = new GetMethod(url);
HttpClient client = getClient();
authenticate(baseurl, port, user, pass, method, client);
try {
int result = client.executeMethod(method);
if (result != 200) {
handleError(result, method);
}
return method.getResponseBodyAsString();
} catch (Exception e) {
log.error("Problem occured when making rest request to: " + url);
e.printStackTrace();
throw new RuntimeException(e); //we need to do this to tell the UWC that an err occurred
} finally {
method.releaseConnection();
}
}
protected void handleError(int result, HttpMethod method) {
log.error("Problem encountered when making rest call: " +
result + ": " + method.getStatusLine());
this.error = result;
throw new BadResult("" + result);
}
private void authenticate(String baseurl, int port, String user, String pass, HttpMethod method, HttpClient client) {
client.getState().setCredentials(new AuthScope(baseurl, port),
new UsernamePasswordCredentials(user, pass));
method.setDoAuthentication(true);
}
protected Vector<MindtouchPage> parsePageXml(String xml) {
if (!this.running) return null;
log.debug("Parsing page list xml.");
MindtouchPagelistParser parser = new MindtouchPagelistParser();
parseXml(xml, parser);
return parser.getPages();
}
private void parseXml(String xml, DefaultHandler parser) {
if (!this.running) return;
//set up parser objects
XMLReader reader = getXmlReader();
reader.setContentHandler(parser);
reader.setErrorHandler(parser);
//parse
InputSource source = new InputSource(new StringReader(xml));
System.setProperty("http.agent", getUserAgent());
try {
reader.parse(source);
} catch (Exception e) {
String message = "Error while parsing xml. Skipping";
log.error(message);
throw new RuntimeException(e); //Skipping
}
}
/**
* @return the object that will be used to drive the parsing
*/
private XMLReader getXmlReader() {
try {
return XMLReaderFactory.createXMLReader();
} catch (SAXException e) {
String message = "Could not load XmlReader. Skipping.";
log.error(message);
e.printStackTrace();
return null;
}
}
private String getUserAgent() {
if (!this.properties.containsKey("user-agent"))
return DEFAULT_USERAGENT;
return (String) this.properties.get("user-agent");
}
protected Vector<MindtouchPage> getContent(Vector<MindtouchPage> pages) {
if (!this.running) return pages;
for (MindtouchPage page : pages) {
log.debug("...Getting content for: " + getFilename(page));
//get the content for this page
page.content = getContent(page.id);
//get the content for subpages
getContent(page.getSubpages());
}
return pages;
}
protected String getContent(String id) {
if (!this.running) return null;
String content = getPagePart(id, "contents", "format=xhtml");
if (content != null)
content = getAttachmentParents(content);
return content;
}
Pattern attachment = Pattern.compile("" +
"((?:(?:src)|(?:href))=\"[^\"]+[@]api\\/deki\\/files\\/)(\\d+)(\\/=[^\"]+)");
Pattern title = Pattern.compile("(?s)<title>(.*?)<\\/title>");
protected String getAttachmentParents(String input) {
Matcher attFinder = attachment.matcher(input);
StringBuffer sb = new StringBuffer();
boolean found = false;
while (attFinder.find()) {
found = true;
String before = attFinder.group(1);
String id = attFinder.group(2);
String after = attFinder.group(3);
String fileinfo = getFileInfo(id, "info", ""); //XXX what if fileinfo is null?
Matcher titleFinder = title.matcher(fileinfo);
if (titleFinder.find()) {
String title = titleFinder.group(1);
after += "?parent=" + title;
}
String replacement = before + id + after;
replacement = RegexUtil.handleEscapesInReplacement(replacement);
attFinder.appendReplacement(sb, replacement);
}
if (found) {
attFinder.appendTail(sb);
return sb.toString();
}
return input;
}
protected Vector<MindtouchPage> getTags(Vector<MindtouchPage> pages) {
if (!this.running) return pages;
for (MindtouchPage page : pages) {
log.debug("...Getting tags for: " + getFilename(page));
//get the content for this page
page.tags = getTags(page.id);
//get the content for subpages
getTags(page.getSubpages());
}
return pages;
}
protected String getTags(String id) {
if (!this.running) return null;
return getPagePart(id, "tags", "");
}
protected Vector<MindtouchPage> getComments(Vector<MindtouchPage> pages) {
if (!this.running) return pages;
for (MindtouchPage page : pages) {
log.debug("...Getting comments for: " + getFilename(page));
//get the content for this page
page.comments = getComments(page.id);
//get the content for subpages
getComments(page.getSubpages());
}
return pages;
}
protected String getComments(String id) {
if (!this.running) return null;
return getPagePart(id, "comments", "sortby=date.posted");
}
protected Vector<MindtouchPage> getAttachments(Vector<MindtouchPage> pages) {
if (!this.running) return pages;
for (MindtouchPage page : pages) {
log.debug("...Getting file data for: " + getFilename(page));
//get the content for this page
page.attachments = getAttachments(page.id);
//get the content for subpages
getAttachments(page.getSubpages());
}
return pages;
}
protected String getAttachments(String id) {
if (!this.running) return null;
return getPagePart(id, "files", "");
}
/* XXX Start OutputPageData methods */
protected void outputPageData(Vector<MindtouchPage> pages) {
if (!this.running) return;
String output = getOutput();
File outdir = cleanOutputDir(output);
log.info("Writing to file system.");
outputPageData(pages, outdir);
}
protected void outputPageData(Vector<MindtouchPage> pages, File outdir) {
for (MindtouchPage page : pages) {
if (!this.running) return;
//write page content file
String filename = getFilename(page);
String content = getFileContent(page);
File file = createFile(filename, outdir);
log.debug("...Writing page content to: " + filename);
write(file, content);
//attachment directory
if (page.hasAttachments()) {
String attdir = getAttFilename(page);
File attdirFile = createDir(attdir, outdir);
outputAttachmentData(page, attdirFile);
}
//subpages directory
if (!page.getSubpages().isEmpty()) {
String subdir = getSubFilename(page);
File subdirFile = createDir(subdir, outdir);
outputPageData(page.getSubpages(), subdirFile);
}
}
}
protected String getFilename(MindtouchPage page) {
return page.id + FILESYS_DELIM + getFilesystemTitle(page.title) + EXT_FILE;
}
Pattern notwordchar = Pattern.compile("\\W");
protected String getFilesystemTitle(String input) {
Matcher notwordFinder = notwordchar.matcher(input);
return notwordFinder.replaceAll("");
}
protected String getFileContent(MindtouchPage page) {
return FILECONTENT_TAGSTART +
page.content + page.tags + page.comments + page.attachments +
FILECONTENT_TAGEND;
}
protected File createFile(String filename, File parent) {
String abspath = parent.getAbsolutePath() + File.separator + filename;
return new File(abspath);
}
protected void write(File file, String content) {
try {
FileOutputStream fw = new FileOutputStream(file);
OutputStreamWriter outstream = new OutputStreamWriter(fw,
"utf-8");
BufferedWriter out = new BufferedWriter(outstream);
out.write(content);
out.close();
} catch (IOException e) {
log.error("Problem writing to file: " + file.getAbsolutePath());
e.printStackTrace();
}
}
protected String getAttFilename(MindtouchPage page) {
return page.id + FILESYS_DELIM + getFilesystemTitle(page.title) +
FILESYS_DELIM + EXT_ATT;
}
// output the actual attachments using the attachment data and another rest call
protected void outputAttachmentData(MindtouchPage page, File attdir) {
if (!this.running) return;
//get the file ids from the saved xml
String xml = page.attachments;
MindtouchFileIdParser parser = new MindtouchFileIdParser();
parseXml(xml, parser);
Vector<String> ids = parser.getIds();
Vector<String> names = parser.getNames();
//get the files from the rest api and save to file system
outputAttachmentData(ids, names, attdir);
}
protected void outputAttachmentData(Vector<String> ids, Vector<String> names, File attdir) {
if (!this.running) return;
String parent = attdir.getAbsolutePath();
if (!parent.endsWith(File.separator)) parent += File.separator;
String baseurl = getBaseUrl();
int port = getPort();
String user = getUser();
String pass = getPass();
String apiurl = createApiUrl(baseurl);
String forceAuthentication = "?authenticate=true";
for (int i = 0; i < ids.size(); i++) {
String id = ids.get(i);
String name = names.get(i);
log.debug("...Downloading file " + name);
//http://192.168.2.247/@api/deki/files/fileid
//http://developer.mindtouch.com/Deki/API_Reference/GET%3afiles%2f%2f%7bfileid%7d
String url = apiurl + "files/" + id + "/" + forceAuthentication;
GetMethod method = new GetMethod(url);
HttpClient client = getClient();
authenticate(baseurl, port, user, pass, method, client);
//get file from rest as stream
byte[] response = null;
try {
response = getFile(method, client);
} catch (Exception e) {
log.error("Problem occured when making rest request to: " + url);
e.printStackTrace();
return;
}
//write bytes as file
String path = parent + name;
try {
writeFile(response, path);
} catch (IOException e) {
log.error("Problem writing to file: " + path);
e.printStackTrace();
}
}
}
private byte[] getFile(HttpMethod method, HttpClient client) throws HttpException, IOException {
byte[] response = null;
try {
int result = client.executeMethod(method);
if (result != 200) {
handleError(result, method);
}
//output them to the filesystem in the attdir
response = method.getResponseBody();
} finally {
method.releaseConnection();
}
return response;
}
/**
* writes the given bytes to the file at the given path
* @param path string, filepath where text will be written
* @param bytes byte array, btyes to write to filepath
* @throws IOException
*/
protected void writeFile(byte[] bytes, String path) throws IOException {
FileOutputStream filestream = null;
BufferedOutputStream outstream = null;
try {
filestream = new FileOutputStream(path);
outstream = new BufferedOutputStream(filestream);
outstream.write(bytes);
} finally {
outstream.close();
filestream.close();
}
}
protected String getSubFilename(MindtouchPage page) {
return page.id + FILESYS_DELIM + getFilesystemTitle(page.title) +
FILESYS_DELIM + EXT_SUB;
}
protected File createDir(String newdir, File parentdir) {
String parent = parentdir.getAbsolutePath();
if (!parent.endsWith(File.separator)) parent += File.separator;
String path = parent + newdir;
File dirFile = new File(path);
if (dirFile.mkdir()) {
return dirFile;
}
log.error("Problem creating directory: " + path);
return dirFile;
}
/* XXX End OutputPageData methods */
protected String getPagePart(String id, String pagepart, String parameters) {
if (!this.running) return null;
String baseurl = getBaseUrl();
int port = getPort();
String user = getUser();
String pass = getPass();
String apiurl = createApiUrl(baseurl);
String forceAuthentication = "?authenticate=true";
//rest api for getting one page's content
//http://developer.mindtouch.com/Deki/API_Reference/GET%3apages%2f%2f%7bpageid%7d%2f%2fcontents
if (!"".equals(parameters)) parameters = "&" + parameters;
String url = apiurl + "pages/" + id + "/" + pagepart + "/" + forceAuthentication + parameters;
GetMethod method = new GetMethod(url);
HttpClient client = getClient();
authenticate(baseurl, port, user, pass, method, client);
try {
int result = client.executeMethod(method);
if (result != 200) {
handleError(result, method);
}
return method.getResponseBodyAsString();
} catch (Exception e) {
log.error("Problem occured when making rest request to: " + url);
e.printStackTrace();
} finally {
method.releaseConnection();
}
return null;
}
protected String getFileInfo(String id, String pagepart, String parameters) {
if (!this.running) return null;
String baseurl = getBaseUrl();
int port = getPort();
String user = getUser();
String pass = getPass();
String apiurl = createApiUrl(baseurl);
String forceAuthentication = "?authenticate=true";
if (!"".equals(parameters)) parameters = "&" + parameters;
String url = apiurl + "files/" + id + "/" + pagepart + "/" + forceAuthentication + parameters;
GetMethod method = new GetMethod(url);
HttpClient client = getClient();
authenticate(baseurl, port, user, pass, method, client);
try {
int result = client.executeMethod(method);
if (result != 200) {
handleError(result, method);
}
return method.getResponseBodyAsString();
} catch (Exception e) {
log.error("Problem occured when making rest request to: " + url);
e.printStackTrace();
} finally {
method.releaseConnection();
}
return null;
}
protected String createApiUrl(String input) {
if (!input.endsWith("/")) input += "/";
input += "@api/deki/";
if (!input.startsWith("http")) input = "http://" + input;
return input;
}
/**
* deletes and recreates the output directory
*/
protected File cleanOutputDir(String output) {
if (!this.running) return null;
if (!output.endsWith(File.separator)) output += File.separator;
output = output + EXPORT_DIR;
File file = new File(output);
if (!file.exists()) {
log.info("Creating output directory: " + output);
file.mkdir();
}
else {
deleteDir(file);
file.mkdir();
log.info("Cleaning and creating output directory: " + output);
}
return file;
}
/**
* deletes the given file. This method is used recursively.
* @param file can be a directory or a file. Directory does not have to be empty.
*/
protected void deleteDir(File file) {
//if file doesn't exist (shouldn't happen), just exit
if (!file.exists()) return;
String name = "";
try {
name = file.getCanonicalPath();
} catch (IOException e) {
log.error("Problem while deleting directory. No filename!");
e.printStackTrace();
}
//delete the file
if (file.delete()) {
return;
}
else { // or delete the directory
File[] files = file.listFiles();
for (File f : files) {
deleteDir(f);
}
file.delete();
}
}
public void cancel() {
this.running = false;
}
/* Getters and Setters */
public Map getProperties() {
return properties;
}
private String getBaseUrl() {
String prop = (String) properties.get(MindtouchExporter.PROPKEY_URLBASE);
if (prop == null) {
log.error("Problem examining exporter settings.");
throw new IllegalArgumentException("url.base must be defined. See exporter.mindtouch.properties");
}
return prop;
}
private int getPort() {
String prop = (String) properties.get(MindtouchExporter.PROPKEY_URLPORT);
if (prop == null) prop = DEFAULT_PORT; //default port;
return Integer.parseInt(prop);
}
private String getUser() {
String prop = (String) properties.get(MindtouchExporter.PROPKEY_USER);
if (prop == null) {
log.error("Problem examining exporter settings.");
throw new IllegalArgumentException("user must be defined. See exporter.mindtouch.properties");
}
return prop;
}
private String getPass() {
String prop = (String) properties.get(MindtouchExporter.PROPKEY_PASS);
if (prop == null) prop = ""; //default
return prop;
}
Pattern notdigit = Pattern.compile("\\D");
private int getBufferSize() {
String prop = (String) properties.get(MindtouchExporter.PROPKEY_BUFFERSIZE);
if (prop == null || notdigit.matcher(prop).find()) prop = DEFAULT_BUFFERSIZE; //default
return Integer.parseInt(prop);
}
private String getOutput() {
String prop = (String) properties.get(MindtouchExporter.PROPKEY_OUTPUTDIR);
if (prop == null) {
log.error("Problem examining exporter settings.");
throw new IllegalArgumentException("output.dir must be defined. See exporter.mindtouch.properties");
}
return prop;
}
private HttpClient getClient() {
if (this.client == null) {
this.client = new HttpClient();
HttpConnectionManager httpConnectionManager = client.getHttpConnectionManager();
HttpConnectionManagerParams params = httpConnectionManager.getParams();
params.setConnectionTimeout(getTimeout());
}
return this.client;
}
private int getTimeout() {
String prop = (String) properties.get(MindtouchExporter.PROPKEY_TIMEOUT);
if (prop == null) prop = DEFAULT_TIMEOUT_MS;
return Integer.parseInt(prop);
}
private boolean isIgnoringMindtouch() {
String prop = (String) properties.get(MindtouchExporter.PROPKEY_IGNOREMT);
if (prop == null) prop = DEFAULT_IGNOREMT;
return Boolean.parseBoolean(prop);
}
public int getError() {
return error;
}
public void setProperties(Map properties) { //useful for unit testing
this.properties = properties;
}
protected void startRunning() { //useful for junit
this.running = true;
}
public class BadResult extends RuntimeException {
public BadResult(String message) {
super(message);
}
}
}