blob: 4c0d0e3420b1950f0b3c56b059edc01d5128dab8 [file] [log] [blame]
package com.atlassian.uwc.converters.jotspot;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.util.Scanner;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import com.atlassian.uwc.converters.BaseConverter;
import com.atlassian.uwc.ui.ConfluenceSettingsForm;
import com.atlassian.uwc.ui.Page;
/**
* Prepares jotspot attachments for uploading to Confluence.
* NOTE: This class was heavily influenced by TwikiPrepareAttachmentFilesConverter
* @author Laura Kolker
*/
public class AttachmentConverter extends BaseConverter {
private static final String JOTSPOT_IMAGE_DIR = "/_data/";
private static final String JOTSPOT_IMAGE_DATA = "attach.URI.dat";
Logger log = Logger.getLogger(this.getClass());
public void convert(Page page) {
log.debug("Converting Jotspot Attachments -- starting");
// scan the page and create a list of attachments
addAttachmentsToPage(page);
log.debug("Converting Jotspot Attachments -- complete");
}
/**
* looks for attachments and attaches them
* @param page object to attach pages to
*/
protected void addAttachmentsToPage(Page page) {
//two types of attachments, images and attachments
//images
Vector<String> imagepaths = getImagePaths(page.getOriginalText());
attach(imagepaths, page);
//attachments
Vector<String> attachmentPaths = getAttachmentPaths(page);
attach(attachmentPaths, page);
}
/**
* attaches given String Vector of paths to page
* @param paths
* @param page
*/
private void attach(Vector<String> paths, Page page) {
//foreach path in paths
for (String path : paths) {
//get the complete path to the file
log.debug("complete path = " + path);
//confirm existance of file
File file = new File(path);
if (!file.exists() || file.isDirectory())
continue;
//attach the file
log.debug("adding attachment: " + file.getName());
page.addAttachment(file);
}
}
String correctPath = "(.*\\/([^/]+)\\" + JOTSPOT_IMAGE_DIR + ")";
Pattern correctPathPattern = Pattern.compile(correctPath);
/**
* changes the jotspot default imagename (which is a .dat) to the
* actual image name, by examining the path to said image
* @param path default path
* <br>For example:
* /attachDirectory/System/TmpImageUpload/hobbespounce.gif/_data/attach.URI.dat
* @return useful path
* <br/>For example:
* /attachDirectory/System/TmpImageUpload/hobbespounce.gif/_data/hobbespounce.gif
*/
private String getUsefulImagePath(String path) {
log.debug("renaming path = " + path);
Matcher correctPathFinder = correctPathPattern.matcher(path);
if (correctPathFinder.lookingAt()) {
String correctName = correctPathFinder.group(2);
String correctDir = correctPathFinder.group(1);
//check for ascii char encodings (only encode directory
//as wiki syntax won't find encoded character filenames)
correctDir = encodeChars(correctDir);
path = correctDir + correctName;
log.debug("correct path = " + path);
}
return path;
}
Pattern nonfilenameChars = Pattern.compile("[^-\\w+ \\.\\/\\\\]");
/**
* checks for non filename safe characters (like , (comma) for example)
* and encodes thems.
* @param string For example:
* thisStringHasA,comma
* @return encoded nonword characters, For example:
* thisStringHasA%2Ccomma
*/
protected String encodeChars(String string) {
String encoding = "UTF-8";//FIXME long term, provide property for this?
Matcher nonwordcharFinder = nonfilenameChars.matcher(string);
StringBuffer sb = new StringBuffer();
//find the nonfilename characters
while (nonwordcharFinder.find()) {
String badChar = nonwordcharFinder.group();
String encoded = null;
//and encode them.
try {
encoded = URLEncoder.encode(badChar, encoding);
} catch (UnsupportedEncodingException e) {
log.error("Problem with encoding: " + encoding);
e.printStackTrace();
}
nonwordcharFinder.appendReplacement(sb, encoded);
}
nonwordcharFinder.appendTail(sb);
String encodedStr = sb.toString();
if (encodedStr == null || "".equals(encodedStr))
encodedStr = string;
return encodedStr;
}
String imgSrc = "<img src=\"([^\"]+)\"[^/]*/>";
Pattern imgPattern = Pattern.compile(imgSrc);
/**
* @param input page text
* @return list of absolute paths to images
*/
private Vector<String> getImagePaths(String input) {
Vector<String> paths = new Vector<String>();
//get paths from img src using regex
Matcher imgFinder = imgPattern.matcher(input);
String root = this.getAttachmentDirectory();
while (imgFinder.find()) {
String path = imgFinder.group(1);
if (!path.startsWith("\\/")) path = "/" + path;
path = root + path; //this is actually an image directory !
String tmpPath = path + JOTSPOT_IMAGE_DIR + JOTSPOT_IMAGE_DATA; //here's where the image is
String toPath = getUsefulImagePath(tmpPath); //but that's a useless name for the file, so we change it
tmpPath = encodeChars(tmpPath); //only encode directory, as wiki syntax won't find encoded character filenames
File tmpFile = new File(tmpPath);
File toFile = new File(toPath);
if (!tmpFile.renameTo(toFile)) { //this happens when there's something there already
log.debug("Can't rename " + tmpFile + " to " + toFile);
}
log.debug("getImagePaths path = " + toPath);
paths.add(toPath);
}
return paths;
}
/**
* figures out what attachments are associated with this page.
* @param page
* @return String vector of absolute paths to attachments
*/
private Vector<String> getAttachmentPaths(Page page) {
Vector<String> paths = new Vector<String>();
//get the directory with the same name as this page
File pageDir = getPageDir(page);
if (pageDir == null)
return paths;
//go down one level
File files[] = pageDir.listFiles();
//open up the xml files
for (File file : files) {
//check for existence and non-directoryness
if (!file.exists() || file.isDirectory())
continue;
//slurp string
String fileContents = read(file);
// log.debug("fileContents = " + fileContents);
if (isAttachment(fileContents)) {
String path = getPath(fileContents);
String root = this.getAttachmentDirectory();
String basePath = encodeChars(root + path + JOTSPOT_IMAGE_DIR);
String tmpPath = basePath + JOTSPOT_IMAGE_DATA;
path = basePath + getFilename(path);
File tmpFile = new File(tmpPath);
File toFile = new File(path);
if (!tmpFile.renameTo(toFile)) { //this happens when there's already a file there
log.debug("Can't rename " + tmpPath + " to " + path);
}
log.debug("attachment path = " + path);
if (path == null)
continue;
paths.add(path);
}
}
return paths;
}
/**
* @param path
* @return filename for a given filepath.
* <br/>Example:
* <br/>path = /Dirpath/myfile.txt
* <br/>return = myfile.txt
*/
private String getFilename(String path) {
File file = new File(path);
return file.getName();
}
Pattern attachmentPath = Pattern.compile("<node[^>]+?path=\"([^\"]*)\"");
/**
* Uses the given file contents to determine the path to the attachment
* @param fileContents
* @return
*/
private String getPath(String fileContents) {
log.debug("non image attachment!!!");
Matcher pathFinder = attachmentPath.matcher(fileContents);
if (pathFinder.find()) {
String path = pathFinder.group(1);
path = path.replaceAll(" ", "+");
log.debug("path = " + path);
return path;
}
return null;
}
Pattern attachmentClue = Pattern.compile("nodeClass=\"attachment\"");
/**
* Uses the given file contents to determine if that file represents
* an attachment
* @param fileContents
* @return true if fileContents represents an attachment
*/
private boolean isAttachment(String fileContents) {
Matcher attachmentClueFinder = attachmentClue.matcher(fileContents);
return attachmentClueFinder.find();
}
/**
* reads the contents of the file
* @param file file is assumed to be an existing non-directory file
* @return the contents of the file as a string or null if file could
* not be found
*/
private String read(File file) {
String string = "";
try {
Scanner in = new Scanner(file);
while (in.hasNext()) {
string += in.next() + " ";
}
} catch (FileNotFoundException e) {
log.debug("Problem opening file: " + file.getName());
e.printStackTrace();
return null;
}
return string;
}
Pattern extension = Pattern.compile("(.*)\\.\\w+$");
/**
* figures out the associated directory for the given page
* @param page
* @return page directory or null if none exists
* <br/>Example:
* <br/>If the page exists at: /SomeDirectory/My+Page.xml
* <br/>returns /SomeDirectory/My+Page if that represents an existing directory
*/
private File getPageDir(Page page) {
String similarPath = page.getFile().getPath();
log.debug("similar = " + similarPath);
Matcher extFinder = extension.matcher(similarPath);
if (extFinder.lookingAt()) {
String dirPath = extFinder.group(1);
log.debug("dirPath = " + dirPath);
File file = new File(dirPath);
if (file.exists() && file.isDirectory()) {
return file;
}
}
return null;
}
}