blob: 817e6cfc01e4a4cded79990d4d35f831a31a86a1 [file] [log] [blame]
package com.atlassian.uwc.converters.moinmoin;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import com.atlassian.uwc.converters.BaseConverter;
import com.atlassian.uwc.converters.tikiwiki.RegexUtil;
import com.atlassian.uwc.prep.MoinMoinPreparation;
import com.atlassian.uwc.ui.ConverterEngine;
import com.atlassian.uwc.ui.Page;
/**
* Pre-processes file names in images and attachments to that the ImageAttachmentConverter can
* find them. This class handles attachments linked with "attachment:", "inline:" and "drawing:",
* but note that the drawings will lose any image maps and you will not be able to edit them in
* Confluence like in MoinMoin.
*
* <p>Examples:
* <ul>
* <li> "foo attachment:image.png bar" is converted to "foo !image.png! bar"
* <li> "foo attachment:page/subpage/image.png bar" is converted to "foo !subpage^image.png! bar".
* There is no need to specify the path since page names in Confluence are unique within a space.
* </ul>
*
* <h3>Notes</h3>
* <p>This converter only stores attachments that appear on the page (as a link or image). Any other
* attachments will <em>NOT</em> be stored in Confluence, even if they are linked to from other pages.
*
* <p>The converter assumes that file and path names don't contain whitespace and that links are always
* followed by whitespace (unless they are right at the end of the file). If that is not true for your
* wiki, you will have to change the method <code>findLinkEnd()</code>.
*
* @author Rolf Staflin (rstaflin)
*/
public class MoinMoinAttachmentConverter extends BaseConverter {
private Logger log = Logger.getLogger(MoinMoinAttachmentConverter.class);
private static final String ATTACHMENT = "attachment:";
private static final String INLINE = "inline:";
private static final String DRAWING = "drawing:";
private static final String ATTACHMENT_DIR = "attachments";
private static final String UTF8_FILE_SEPARATOR_REGEX = "\\(2f\\)";
private static final String UTF8_FILE_SEPARATOR = "(2f)";
/**
* Converts any attachment links from the MoinMoin format to Confluence's format and
* attaches them to the page.
*
* @param page A page with text to be converted.
*/
public void convert(Page page) {
//OLD HANDLING -- COMMENTING FOR NOW
// if (log.isDebugEnabled()) {
// log.debug(">convert(" + page.getName() + ")");
// }
// assert page != null;
// assert page.getOriginalText() != null;
//
// StringBuffer text = new StringBuffer(page.getOriginalText());
//
// // Convert all the "attachment:" links
// int linkStart = text.indexOf(ATTACHMENT);
// while (linkStart >= 0) {
// handleAttachment(text, linkStart, page, null);
// linkStart = text.indexOf(ATTACHMENT);
// }
//
// // Convert all the "inline:" links
// linkStart = text.indexOf(INLINE);
// while (linkStart >= 0) {
// handleAttachment(text, linkStart, page, null);
// linkStart = text.indexOf(INLINE);
// }
//
// // Convert all the "drawing:" links, forcing the extension to be ".png"
// linkStart = text.indexOf(DRAWING);
// while (linkStart >= 0) {
// handleAttachment(text, linkStart, page, ".png");
// linkStart = text.indexOf(DRAWING);
// }
String input = page.getOriginalText();
input = convertAttachments(input, page);
page.setConvertedText(input);
// Fix the name and path of the page.
setupNameAndPath(page);
log.debug("<convert(" + page.getName() + ")");
String pagename = page.getName();
// Remove the extension, if present
if (pagename.endsWith(MoinMoinPreparation.EXTENSION)) {
pagename = pagename.substring(0, pagename.length() - MoinMoinPreparation.EXTENSION.length());
}
String baseDir = this.getAttachmentDirectory() + File.separator + pagename + File.separator + ATTACHMENT_DIR;
log.info("Attachment Path: " + baseDir);
File base = new File(baseDir);
log.info("Attachment File: " + (base == null ? "(null)" : base.toString()) );
if (base != null && base.exists()){
for(File f : base.listFiles()){
if(f.isFile()){
log.info("Adding Attachment: " + f.getAbsolutePath() );
page.addAttachment(f);
}
}
}
}
Pattern attachment = Pattern.compile("([{\\[])"+
"\\1?" +
"(?:(?:attachment)|(?:inline)):"+
"([^}\\]\\|]+)" +
// Potentially the target is followed by | and an "alt" text
"(?:\\|([^}\\]]+))?" +
"([}\\]])\\4?");
Pattern pagedelim = Pattern.compile("\\/([^/]*)");
private String convertAttachments(String input, Page page) {
Matcher attachmentFinder = attachment.matcher(input);
StringBuffer sb = new StringBuffer();
boolean found = false;
while (attachmentFinder.find()) {
found = true;
String type = attachmentFinder.group(1);
String target = attachmentFinder.group(2);
String altText = attachmentFinder.group(3);
String replacement = "";
Matcher pagedelimfinder = pagedelim.matcher(target);
boolean haspagename = false;
String filename = target;
if (pagedelimfinder.find()) {
filename = pagedelimfinder.group(1);
target = pagedelimfinder.replaceFirst("^" +filename);
haspagename = true;
}
if (type.startsWith("{")) { //inline
if (altText != null && !altText.isEmpty()) {
target = target + "|alt=" + altText + " title=" + altText;
}
replacement = "!" + target + "!";
}
else { //link
if (!haspagename) target = "^" + target;
replacement = "[" + target + "]";
}
replacement = RegexUtil.handleEscapesInReplacement(replacement);
attachmentFinder.appendReplacement(sb, replacement);
attachfile(filename, page);
}
if (found) {
attachmentFinder.appendTail(sb);
return sb.toString();
}
return input;
}
private void attachfile(String filename, Page page) {
String pagename = page.getName();
pagename = pagename.replaceFirst("\\.txt$", "");
String filePath = getAttachmentDirectory()
+ File.separator + pagename + File.separator + "attachments" + File.separator + filename;
File file = new File(filePath);
if (!file.exists()) {
log.error("Could not find attachment: " + filePath);
}
else page.addAttachment(file);
}
/**
* Handles converting a link of the form "protocol:path/to/page/filename.ext".
*
* If the path is not present ("protocol:filename.ext") it refers to an attachment of the
* current page. The actual file resides in path(2f)to(2f)the(2f)current(2f)page/attachments.
* It is added as an attachment to the Confluence page being converted.
*
* If the extension is an image, the link is converted to "!page^filename.ext!",
* otherwise the link is converted to "[page^filename.ext]".
*
* @param text The page text. The contents of this buffer is altered by this method.
* @param linkStart index of the first letter of the protocol name
* @param page A page object corresponding to the page being converted
* @param forcedExtension If not <code>null</code>, this string is appended to the end of the filename.
* Example: ".png"
*/
private void handleAttachment(StringBuffer text, int linkStart, Page page, String forcedExtension) {
if (log.isDebugEnabled()) {
log.debug(">handleAttachment(" + linkStart + ")");
}
int fileNameStart = text.indexOf(":", linkStart) + 1;
int linkEnd = findLinkEnd(fileNameStart, text);
// newLink will hold the new link markup.
StringBuffer newLink = new StringBuffer();
String filePath = text.substring(fileNameStart, linkEnd);
boolean namedAttachment = filePath.endsWith("]");
// Append the forced extension, if any
if (forcedExtension != null && forcedExtension.length() > 0) {
filePath += forcedExtension;
}
// Get rid of brackets inserted by the link syntax regex converters.
filePath = filePath.replaceAll("\\[", "");
filePath = filePath.replaceAll("\\]", "");
// If the link leads to another page, linkPage will hold its name
String linkPage = null;
if (filePath.contains("/")) {
log.debug(filePath + " inneh�ll /!");
// This is a link to an attachment on some other page.
// Get the page name and file name from the path
String path = filePath.substring(0, filePath.lastIndexOf("/"));
linkPage = (path.contains("/") ?
path.substring(path.lastIndexOf("/") + 1) :
path);
} else {
// This is a link to an attachment on this page. We need to
// move the attachment to Confluence!
log.debug(filePath + " inneh�ll INTE /!");
String pagename = page.getName();
// Remove the extension, if present
if (pagename.endsWith(MoinMoinPreparation.EXTENSION)) {
pagename = pagename.substring(0, pagename.length() - MoinMoinPreparation.EXTENSION.length());
}
filePath = pagename + File.separator + ATTACHMENT_DIR + File.separator + filePath;
//Add the file as an attachment to the Confluence page
addAttachment(filePath, page);
}
// Now decide if this is an image or some other attachment that should be linked instead.
// create a link according to the type.
File file = new File(filePath);
if (isImage(file)) {
makeImageTag(newLink, linkPage, file.getName());
} else {
makeAttachmentTag(newLink, linkPage, file.getName(), !namedAttachment);
}
// Make the change
text.replace(linkStart, linkEnd, newLink.toString());
if (log.isDebugEnabled()) {
log.debug("<handleAttachment() -- new link: " + newLink);
}
}
/**
* Figure out where a link file name ends, given it's beginning.
* This method assumes that the filename ends with whitespace.
*
* @param fileNameStart Index of the first letter of the file name
* @param text The text to search through
* @return The index of the first letter past the end of the file name.
* This may be past the end of the text if the file name is at
* the end of it.
*/
private int findLinkEnd(int fileNameStart, StringBuffer text) {
int linkEnd = fileNameStart;
while (linkEnd < text.length()
&& !Character.isWhitespace(text.charAt(linkEnd))
&& !ATTACHMENT_ENDING_CHARS.contains(text.charAt(linkEnd))) {
linkEnd++;
}
if (text.charAt(linkEnd - 1) == '.') linkEnd--;
return linkEnd;
}
public static final Set<Character> ATTACHMENT_ENDING_CHARS = Collections.unmodifiableSet(new HashSet<Character>(Arrays.asList(new Character[] {
'*', ',', ';', ')', '(', '|', ':', ';', '!'
})));
private boolean isImage(File file) {
String mimetype = ConverterEngine.determineContentType(file);
return mimetype.startsWith("image");
}
private void makeImageTag(StringBuffer newLink, String linkPage, String fileName) {
newLink.append("!");
if (linkPage != null && !"".equals(linkPage)) {
newLink.append(linkPage).
append("^");
}
newLink.append(fileName).
append("!");
}
private void makeAttachmentTag(StringBuffer newLink, String linkPage, String fileName, boolean needSquareBrackets) {
if (needSquareBrackets) newLink.append("[");
if (linkPage != null && !"".equals(linkPage)) {
newLink.append(linkPage);
}
newLink.append("^");
newLink.append(fileName);
newLink.append("]");
}
/**
* Adds the attachment to the page object
* @param filePath path to the file that is to be added
* @param page the page that is to receive the attachment
*/
private void addAttachment(String filePath, Page page) {
String baseDir = this.getAttachmentDirectory();
File attachment = new File(baseDir + File.separator + filePath);
page.addAttachment(attachment);
}
/**
* Changes the name of a page.
* <code>MoinMoinPreparation</code> stores the page files
* as "path(2f)to(2f)the(2f)page.uwc". This method changes the
* page name to "page" and sets the page path to
* "path/to/the".
*
* @param page the page object to set up.
*/
private void setupNameAndPath(Page page) {
if (log.isDebugEnabled()) {
log.debug(">setupNameAndPath(" + page.getName() + ")");
}
String name = page.getName();
if (name == null) {
name = "";
}
if (name.endsWith(MoinMoinPreparation.EXTENSION)) {
name = name.substring(0, name.length() - MoinMoinPreparation.EXTENSION.length());
}
// sub-pages managment
if (name.contains(UTF8_FILE_SEPARATOR)) {
String newName = name.replaceAll(UTF8_FILE_SEPARATOR_REGEX, " ");
log.info(name.replaceAll(UTF8_FILE_SEPARATOR_REGEX, "/")
+ " renamed to: \"" + newName + "\".");
name = newName;
}
name = convertPageNameToUnicode(name);
name = name.replaceAll("_", " ");
// ':' is illegal in a Confluence page name.
name = name.replaceAll(":", " -");
String path = "";
int pathEnd = name.lastIndexOf(File.separator);
if (pathEnd >= 0) {
path = name.substring(0, pathEnd);
name = name.substring(pathEnd + 1);
}
page.setPath(path);
// Finally start the page name with an uppercase character
if (name.length() > 1) {
name = Character.toUpperCase(name.charAt(0)) + name.substring(1);
}
page.setName(name);
if (log.isDebugEnabled()) {
log.debug("<setupNameAndPath() Path: " + page.getPath() + ", Name: \"" + page.getName() + "\"");
}
}
/** Needed for pages only: attachments names are already Unicode. */
public static String convertPageNameToUnicode(String asciiName) {
char[] asciiNameChars = asciiName.toCharArray();
StringBuilder result = new StringBuilder(asciiName.length());
int i = 0;
try {
while (i < asciiNameChars.length) {
if (asciiNameChars[i] != '(') {
result.append(asciiNameChars[i++]);
}
else {
List<Byte> utf8Bytes = new ArrayList<Byte>();
while (asciiNameChars[++i] != ')') {
StringBuilder hexValue = new StringBuilder(4);
hexValue.append("0x");
hexValue.append(asciiNameChars[i]);
hexValue.append(asciiNameChars[++i]);
Integer intValue = Integer.decode(hexValue.toString());
utf8Bytes.add(intValue.byteValue());
}
byte[] utf8ByteArray = new byte[utf8Bytes.size()];
for (int k = 0; k < utf8ByteArray.length; k++) {
utf8ByteArray[k] = utf8Bytes.get(k);
}
result.append(new String(utf8ByteArray, "utf-8"));
i++;
}
}
return result.toString();
} catch (Exception e) {
e.printStackTrace();
return asciiName;
}
}
}