src/main/java/com/atlassian/uwc/converters/jotspot/AttachmentConverter.java - universal-wiki-converter - Git at Google

 package com.atlassian.uwc.converters.jotspot;

 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.UnsupportedEncodingException;
 import java.net.URLEncoder;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.CharacterCodingException;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetEncoder;
 import java.util.Scanner;
 import java.util.Vector;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import org.apache.log4j.Logger;

 import com.atlassian.uwc.converters.BaseConverter;
 import com.atlassian.uwc.ui.ConfluenceSettingsForm;
 import com.atlassian.uwc.ui.Page;

 /**
  * Prepares jotspot attachments for uploading to Confluence.
  * NOTE: This class was heavily influenced by TwikiPrepareAttachmentFilesConverter
  * @author Laura Kolker
  */
 public class AttachmentConverter extends BaseConverter {
 	private static final String JOTSPOT_IMAGE_DIR = "/_data/";
 	private static final String JOTSPOT_IMAGE_DATA = "attach.URI.dat";
 	Logger log = Logger.getLogger(this.getClass());

 	public void convert(Page page) {
 		log.debug("Converting Jotspot Attachments -- starting");
         // scan the page and create a list of attachments
         addAttachmentsToPage(page);
 		log.debug("Converting Jotspot Attachments -- complete");

 	}

     /**
      * looks for attachments and attaches them
      * @param page object to attach pages to
      */
     protected void addAttachmentsToPage(Page page) {
     	//two types of attachments, images and attachments

     	//images
     	Vector<String> imagepaths = getImagePaths(page.getOriginalText());
     	attach(imagepaths, page);

     	//attachments
     	Vector<String> attachmentPaths = getAttachmentPaths(page);
     	attach(attachmentPaths, page);

     }

 	/**
 	 * attaches given String Vector of paths to page
 	 * @param paths
 	 * @param page
 	 */
 	private void attach(Vector<String> paths, Page page) {

 		//foreach path in paths
 		for (String path : paths) {
 			//get the complete path to the file

 			log.debug("complete path = " + path);

 			//confirm existance of file
 			File file = new File(path);
 	        if (!file.exists() || file.isDirectory())
 	        	continue;

 			//attach the file
 			log.debug("adding attachment: " + file.getName());
 			page.addAttachment(file);
 		}
 	}

 	String correctPath = "(.*\\/([^/]+)\\" + JOTSPOT_IMAGE_DIR + ")";
 	Pattern correctPathPattern = Pattern.compile(correctPath);
 	/**
 	 * changes the jotspot default imagename (which is a .dat) to the
 	 * actual image name, by examining the path to said image
 	 * @param path default path
 	 * <br>For example:
 	 * /attachDirectory/System/TmpImageUpload/hobbespounce.gif/_data/attach.URI.dat
 	 * @return useful path
 	 * <br/>For example:
 	 * /attachDirectory/System/TmpImageUpload/hobbespounce.gif/_data/hobbespounce.gif
 	 */
 	private String getUsefulImagePath(String path) {
 		log.debug("renaming path = " + path);
 		Matcher correctPathFinder = correctPathPattern.matcher(path);
 		if (correctPathFinder.lookingAt()) {
 			String correctName = correctPathFinder.group(2);
 			String correctDir = correctPathFinder.group(1);
 			//check for ascii char encodings (only encode directory
 			//as wiki syntax won't find encoded character filenames)
 			correctDir = encodeChars(correctDir);
 			path = correctDir + correctName;
 			log.debug("correct path = " + path);
 		}

 		return path;
 	}

 	Pattern nonfilenameChars = Pattern.compile("[^-\\w+ \\.\\/\\\\]");
 	/**
 	 * checks for non filename safe characters (like , (comma) for example)
 	 * and encodes thems.
 	 * @param string For example:
 	 * thisStringHasA,comma
 	 * @return encoded nonword characters, For example:
 	 * thisStringHasA%2Ccomma
 	 */
 	protected String encodeChars(String string) {
 		String encoding = "UTF-8";//FIXME long term, provide property for this?
 		Matcher nonwordcharFinder = nonfilenameChars.matcher(string);
 		StringBuffer sb = new StringBuffer();
 		//find the nonfilename characters
 		while (nonwordcharFinder.find()) {
 			String badChar = nonwordcharFinder.group();
 			String encoded = null;
 			//and encode them.
 			try {
 				encoded = URLEncoder.encode(badChar, encoding);
 			} catch (UnsupportedEncodingException e) {
 				log.error("Problem with encoding: " + encoding);
 				e.printStackTrace();
 			}
 			nonwordcharFinder.appendReplacement(sb, encoded);
 		}
 		nonwordcharFinder.appendTail(sb);
 	    String encodedStr = sb.toString();
 	    if (encodedStr == null || "".equals(encodedStr))
 	    	encodedStr = string;
 		return encodedStr;
 	}


 	String imgSrc = "<img src=\"([^\"]+)\"[^/]*/>";
 	Pattern imgPattern = Pattern.compile(imgSrc);
 	/**
 	 * @param input page text
 	 * @return list of absolute paths to images
 	 */
 	private Vector<String> getImagePaths(String input) {
 		Vector<String> paths = new Vector<String>();

 		//get paths from img src using regex
 		Matcher imgFinder = imgPattern.matcher(input);
 		String root = this.getAttachmentDirectory();
 		while (imgFinder.find()) {
 			String path = imgFinder.group(1);
 			if (!path.startsWith("\\/")) path = "/" + path;
 			path = root + path; //this is actually an image directory !
 			String tmpPath = path + JOTSPOT_IMAGE_DIR + JOTSPOT_IMAGE_DATA; //here's where the image is
 			String toPath = getUsefulImagePath(tmpPath); //but that's a useless name for the file, so we change it
 			tmpPath = encodeChars(tmpPath); //only encode directory, as wiki syntax won't find encoded character filenames
 			File tmpFile = new File(tmpPath);
 			File toFile = new File(toPath);
 			if (!tmpFile.renameTo(toFile)) { //this happens when there's something there already
 				log.debug("Can't rename " + tmpFile + " to " + toFile);
 			}

 			log.debug("getImagePaths path = " + toPath);
 			paths.add(toPath);
 		}

 		return paths;
 	}

 	/**
 	 * figures out what attachments are associated with this page.
 	 * @param page
 	 * @return String vector of absolute paths to attachments
 	 */
 	private Vector<String> getAttachmentPaths(Page page) {
 		Vector<String> paths = new Vector<String>();

     	//get the directory with the same name as this page
     	File pageDir = getPageDir(page);
     	if (pageDir == null)
     		return paths;

     	//go down one level
     	File files[] = pageDir.listFiles();

     	//open up the xml files
     	for (File file : files) {
     		//check for existence and non-directoryness
     		if (!file.exists() || file.isDirectory())
     			continue;
     		//slurp string
     		String fileContents = read(file);
 //    		log.debug("fileContents = " + fileContents);
     		if (isAttachment(fileContents)) {
     			String path = getPath(fileContents);
     			String root = this.getAttachmentDirectory();
     			String basePath = encodeChars(root + path + JOTSPOT_IMAGE_DIR);
     			String tmpPath = basePath + JOTSPOT_IMAGE_DATA;
     			path = basePath + getFilename(path);
     			File tmpFile = new File(tmpPath);
     			File toFile = new File(path);
     			if (!tmpFile.renameTo(toFile)) { //this happens when there's already a file there
     				log.debug("Can't rename " + tmpPath + " to " + path);
     			}
     			log.debug("attachment path = " + path);
     			if (path == null)
     				continue;
     			paths.add(path);
     		}
     	}

     	return paths;
 	}


 	/**
 	 * @param path
 	 * @return filename for a given filepath.
 	 * <br/>Example:
 	 * <br/>path = /Dirpath/myfile.txt
 	 * <br/>return = myfile.txt
 	 */
 	private String getFilename(String path) {
 		File file = new File(path);
 		return file.getName();
 	}

 	Pattern attachmentPath = Pattern.compile("<node[^>]+?path=\"([^\"]*)\"");
 	/**
 	 * Uses the given file contents to determine the path to the attachment
 	 * @param fileContents
 	 * @return
 	 */
 	private String getPath(String fileContents) {
 		log.debug("non image attachment!!!");
 		Matcher pathFinder = attachmentPath.matcher(fileContents);
 		if (pathFinder.find()) {
 			String path = pathFinder.group(1);
 			path = path.replaceAll(" ", "+");
 			log.debug("path = " + path);
 			return path;
 		}
 		return null;
 	}

 	Pattern attachmentClue = Pattern.compile("nodeClass=\"attachment\"");
 	/**
 	 * Uses the given file contents to determine if that file represents
 	 * an attachment
 	 * @param fileContents
 	 * @return true if fileContents represents an attachment
 	 */
 	private boolean isAttachment(String fileContents) {
 		Matcher attachmentClueFinder = attachmentClue.matcher(fileContents);
 		return attachmentClueFinder.find();
 	}

 	/**
 	 * reads the contents of the file
 	 * @param file file is assumed to be an existing non-directory file
 	 * @return the contents of the file as a string or null if file could
 	 * not be found
 	 */
 	private String read(File file) {

 		String string = "";
 		try {
 			Scanner in = new Scanner(file);
 			while (in.hasNext()) {
 				string += in.next() + " ";
 			}
 		} catch (FileNotFoundException e) {
 			log.debug("Problem opening file: " + file.getName());
 			e.printStackTrace();
 			return null;
 		}
 		return string;
 	}

 	Pattern extension = Pattern.compile("(.*)\\.\\w+$");
 	/**
 	 * figures out the associated directory for the given page
 	 * @param page
 	 * @return page directory or null if none exists
 	 * <br/>Example:
 	 * <br/>If the page exists at: /SomeDirectory/My+Page.xml
 	 * <br/>returns /SomeDirectory/My+Page if that represents an existing directory
 	 */
 	private File getPageDir(Page page) {
 		String similarPath = page.getFile().getPath();
 		log.debug("similar = " + similarPath);
 		Matcher extFinder = extension.matcher(similarPath);
 		if (extFinder.lookingAt()) {
 			String dirPath = extFinder.group(1);
 			log.debug("dirPath = " + dirPath);
 			File file = new File(dirPath);
 			if (file.exists() && file.isDirectory()) {
 				return file;
 			}
 		}
 		return null;
 	}

 }
	package com.atlassian.uwc.converters.jotspot;

	import java.io.File;
	import java.io.FileNotFoundException;
	import java.io.UnsupportedEncodingException;
	import java.net.URLEncoder;
	import java.nio.ByteBuffer;
	import java.nio.CharBuffer;
	import java.nio.charset.CharacterCodingException;
	import java.nio.charset.Charset;
	import java.nio.charset.CharsetEncoder;
	import java.util.Scanner;
	import java.util.Vector;
	import java.util.regex.Matcher;
	import java.util.regex.Pattern;

	import org.apache.log4j.Logger;

	import com.atlassian.uwc.converters.BaseConverter;
	import com.atlassian.uwc.ui.ConfluenceSettingsForm;
	import com.atlassian.uwc.ui.Page;

	/**
	* Prepares jotspot attachments for uploading to Confluence.
	* NOTE: This class was heavily influenced by TwikiPrepareAttachmentFilesConverter
	* @author Laura Kolker
	*/
	public class AttachmentConverter extends BaseConverter {
	private static final String JOTSPOT_IMAGE_DIR = "/_data/";
	private static final String JOTSPOT_IMAGE_DATA = "attach.URI.dat";
	Logger log = Logger.getLogger(this.getClass());

	public void convert(Page page) {
	log.debug("Converting Jotspot Attachments -- starting");
	// scan the page and create a list of attachments
	addAttachmentsToPage(page);
	log.debug("Converting Jotspot Attachments -- complete");

	}

	/**
	* looks for attachments and attaches them
	* @param page object to attach pages to
	*/
	protected void addAttachmentsToPage(Page page) {
	//two types of attachments, images and attachments

	//images
	Vector<String> imagepaths = getImagePaths(page.getOriginalText());
	attach(imagepaths, page);

	//attachments
	Vector<String> attachmentPaths = getAttachmentPaths(page);
	attach(attachmentPaths, page);

	}

	/**
	* attaches given String Vector of paths to page
	* @param paths
	* @param page
	*/
	private void attach(Vector<String> paths, Page page) {

	//foreach path in paths
	for (String path : paths) {
	//get the complete path to the file

	log.debug("complete path = " + path);

	//confirm existance of file
	File file = new File(path);
	if (!file.exists() \|\| file.isDirectory())
	continue;

	//attach the file
	log.debug("adding attachment: " + file.getName());
	page.addAttachment(file);
	}
	}

	String correctPath = "(.*\\/([^/]+)\\" + JOTSPOT_IMAGE_DIR + ")";
	Pattern correctPathPattern = Pattern.compile(correctPath);
	/**
	* changes the jotspot default imagename (which is a .dat) to the
	* actual image name, by examining the path to said image
	* @param path default path
	* <br>For example:
	* /attachDirectory/System/TmpImageUpload/hobbespounce.gif/_data/attach.URI.dat
	* @return useful path
	* <br/>For example:
	* /attachDirectory/System/TmpImageUpload/hobbespounce.gif/_data/hobbespounce.gif
	*/
	private String getUsefulImagePath(String path) {
	log.debug("renaming path = " + path);
	Matcher correctPathFinder = correctPathPattern.matcher(path);
	if (correctPathFinder.lookingAt()) {
	String correctName = correctPathFinder.group(2);
	String correctDir = correctPathFinder.group(1);
	//check for ascii char encodings (only encode directory
	//as wiki syntax won't find encoded character filenames)
	correctDir = encodeChars(correctDir);
	path = correctDir + correctName;
	log.debug("correct path = " + path);
	}

	return path;
	}

	Pattern nonfilenameChars = Pattern.compile("[^-\\w+ \\.\\/\\\\]");
	/**
	* checks for non filename safe characters (like , (comma) for example)
	* and encodes thems.
	* @param string For example:
	* thisStringHasA,comma
	* @return encoded nonword characters, For example:
	* thisStringHasA%2Ccomma
	*/
	protected String encodeChars(String string) {
	String encoding = "UTF-8";//FIXME long term, provide property for this?
	Matcher nonwordcharFinder = nonfilenameChars.matcher(string);
	StringBuffer sb = new StringBuffer();
	//find the nonfilename characters
	while (nonwordcharFinder.find()) {
	String badChar = nonwordcharFinder.group();
	String encoded = null;
	//and encode them.
	try {
	encoded = URLEncoder.encode(badChar, encoding);
	} catch (UnsupportedEncodingException e) {
	log.error("Problem with encoding: " + encoding);
	e.printStackTrace();
	}
	nonwordcharFinder.appendReplacement(sb, encoded);
	}
	nonwordcharFinder.appendTail(sb);
	String encodedStr = sb.toString();
	if (encodedStr == null \|\| "".equals(encodedStr))
	encodedStr = string;
	return encodedStr;
	}


	String imgSrc = "<img src=\"([^\"]+)\"[^/]*/>";
	Pattern imgPattern = Pattern.compile(imgSrc);
	/**
	* @param input page text
	* @return list of absolute paths to images
	*/
	private Vector<String> getImagePaths(String input) {
	Vector<String> paths = new Vector<String>();

	//get paths from img src using regex
	Matcher imgFinder = imgPattern.matcher(input);
	String root = this.getAttachmentDirectory();
	while (imgFinder.find()) {
	String path = imgFinder.group(1);
	if (!path.startsWith("\\/")) path = "/" + path;
	path = root + path; //this is actually an image directory !
	String tmpPath = path + JOTSPOT_IMAGE_DIR + JOTSPOT_IMAGE_DATA; //here's where the image is
	String toPath = getUsefulImagePath(tmpPath); //but that's a useless name for the file, so we change it
	tmpPath = encodeChars(tmpPath); //only encode directory, as wiki syntax won't find encoded character filenames
	File tmpFile = new File(tmpPath);
	File toFile = new File(toPath);
	if (!tmpFile.renameTo(toFile)) { //this happens when there's something there already
	log.debug("Can't rename " + tmpFile + " to " + toFile);
	}

	log.debug("getImagePaths path = " + toPath);
	paths.add(toPath);
	}

	return paths;
	}

	/**
	* figures out what attachments are associated with this page.
	* @param page
	* @return String vector of absolute paths to attachments
	*/
	private Vector<String> getAttachmentPaths(Page page) {
	Vector<String> paths = new Vector<String>();

	//get the directory with the same name as this page
	File pageDir = getPageDir(page);
	if (pageDir == null)
	return paths;

	//go down one level
	File files[] = pageDir.listFiles();

	//open up the xml files
	for (File file : files) {
	//check for existence and non-directoryness
	if (!file.exists() \|\| file.isDirectory())
	continue;
	//slurp string
	String fileContents = read(file);
	// log.debug("fileContents = " + fileContents);
	if (isAttachment(fileContents)) {
	String path = getPath(fileContents);
	String root = this.getAttachmentDirectory();
	String basePath = encodeChars(root + path + JOTSPOT_IMAGE_DIR);
	String tmpPath = basePath + JOTSPOT_IMAGE_DATA;
	path = basePath + getFilename(path);
	File tmpFile = new File(tmpPath);
	File toFile = new File(path);
	if (!tmpFile.renameTo(toFile)) { //this happens when there's already a file there
	log.debug("Can't rename " + tmpPath + " to " + path);
	}
	log.debug("attachment path = " + path);
	if (path == null)
	continue;
	paths.add(path);
	}
	}

	return paths;
	}


	/**
	* @param path
	* @return filename for a given filepath.
	* <br/>Example:
	* <br/>path = /Dirpath/myfile.txt
	* <br/>return = myfile.txt
	*/
	private String getFilename(String path) {
	File file = new File(path);
	return file.getName();
	}

	Pattern attachmentPath = Pattern.compile("<node[^>]+?path=\"([^\"]*)\"");
	/**
	* Uses the given file contents to determine the path to the attachment
	* @param fileContents
	* @return
	*/
	private String getPath(String fileContents) {
	log.debug("non image attachment!!!");
	Matcher pathFinder = attachmentPath.matcher(fileContents);
	if (pathFinder.find()) {
	String path = pathFinder.group(1);
	path = path.replaceAll(" ", "+");
	log.debug("path = " + path);
	return path;
	}
	return null;
	}

	Pattern attachmentClue = Pattern.compile("nodeClass=\"attachment\"");
	/**
	* Uses the given file contents to determine if that file represents
	* an attachment
	* @param fileContents
	* @return true if fileContents represents an attachment
	*/
	private boolean isAttachment(String fileContents) {
	Matcher attachmentClueFinder = attachmentClue.matcher(fileContents);
	return attachmentClueFinder.find();
	}

	/**
	* reads the contents of the file
	* @param file file is assumed to be an existing non-directory file
	* @return the contents of the file as a string or null if file could
	* not be found
	*/
	private String read(File file) {

	String string = "";
	try {
	Scanner in = new Scanner(file);
	while (in.hasNext()) {
	string += in.next() + " ";
	}
	} catch (FileNotFoundException e) {
	log.debug("Problem opening file: " + file.getName());
	e.printStackTrace();
	return null;
	}
	return string;
	}

	Pattern extension = Pattern.compile("(.*)\\.\\w+$");
	/**
	* figures out the associated directory for the given page
	* @param page
	* @return page directory or null if none exists
	* <br/>Example:
	* <br/>If the page exists at: /SomeDirectory/My+Page.xml
	* <br/>returns /SomeDirectory/My+Page if that represents an existing directory
	*/
	private File getPageDir(Page page) {
	String similarPath = page.getFile().getPath();
	log.debug("similar = " + similarPath);
	Matcher extFinder = extension.matcher(similarPath);
	if (extFinder.lookingAt()) {
	String dirPath = extFinder.group(1);
	log.debug("dirPath = " + dirPath);
	File file = new File(dirPath);
	if (file.exists() && file.isDirectory()) {
	return file;
	}
	}
	return null;
	}

	}