src/main/java/com/atlassian/uwc/converters/IllegalLinkNameConverter.java - universal-wiki-converter - Git at Google

 package com.atlassian.uwc.converters;

 import java.util.HashSet;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import com.atlassian.uwc.converters.tikiwiki.RegexUtil;
 import com.atlassian.uwc.converters.twiki.JavaRegexAndTokenizerConverter;
 import com.atlassian.uwc.ui.Page;

 /**
  * @author Laura Kolker
  * handles correcting links to illegal pagenames, that would have been changed
  * to something legal by IllegalPageNameConverter
  */
 public class IllegalLinkNameConverter extends IllegalNameConverter {

 	private static final String PROPKEY_CUSTOMPROTOCOL = "illegalnames-customprotocol";

 	/**
 	 * delimiter for an alias in links
 	 */
 	private static final String ALIAS_DELIM = "|";

 	public static final String ALLOW_AT_IN_LINKS_KEY = "allow-at-in-links";
 	public static final String ALLOW_TILDE_IN_LINKS_KEY = "allow-tilde-in-links";

 	public void convert(Page page) {
 		log.info("Converting Links Referencing Illegal Names - start");

 		tokenizeCodeBlocks(page);
 		//save the tokenized changes
 		page.setOriginalText(page.getConvertedText());

 		String input = page.getOriginalText();
 		String converted = legalizeLinks(input);
 		page.setConvertedText(converted);


 		//save the converted text so we can detokenize
 		page.setOriginalText(converted);
 		detokenizeCodeBlocks(page);

 		log.info("Converting Links Referencing Illegal Names - complete");
 	}

 	String linksPrefix =
 		"(?<=" +		//zero-width group starts
 			"\\[" +		//left bracket
 		")";			//end zero-width group


 	String linksSuffix =
 		"(?=" +			//zero-width group starts
 			"\\]" +		//right bracket
 		")";			//end zero-width group
 	String links =
 		linksPrefix +	//left Bracket
 		"("+			//start capture (group 1)
 			".*?" +		//everything until
 			"[^\\\\]" +	//not a backslash (this would mean our closing bracket was escaped
 		")" +			//end capture (group 1)
 		linksSuffix;	//rightBracket

 	Pattern linkPattern = Pattern.compile(links);

 	String brackets = "(?<=^|[^\\\\])(\\[|\\])";
 	Pattern bracketPattern = Pattern.compile(brackets);

 	String protocol = "((https?://)|(mailto:)|(file:)|(ftp:))(.*)";
 	Pattern protocolPattern = Pattern.compile(protocol);

 	/**
 	 * @param input
 	 * @return true if the link is an external link (http://somepage.com),
 	 * returns false if the link is an internal wiki page
 	 */
 	public boolean isExternalLink(String input) {
 		Matcher protocolFinder;
 		if (this.getProperties().containsKey(PROPKEY_CUSTOMPROTOCOL)) {
 			try {
 				String custom = this.getProperties().getProperty(PROPKEY_CUSTOMPROTOCOL, protocol);
 				Pattern customPattern = Pattern.compile(custom);
 				protocolFinder = customPattern.matcher(input);
 			} catch (RuntimeException e) {
 				log.warn("Problem compiling custom protocol with link: " + input + " - Using default protocol.");
 				protocolFinder = protocolPattern.matcher(input);
 			}
 		}
 		else
 			protocolFinder = protocolPattern.matcher(input);
 		return protocolFinder.lookingAt();
 	}


 	/**
 	 * transforms any links to illegal pagenames to their
 	 * legal counterpart
 	 * @param input
 	 * @return input with legalized links
 	 */
 	protected String legalizeLinks(String input) {
 		String legal = input;

 //		HashSet<String> originalNames = getIllegalPagenames();
 //		if (originalNames != null)
 //			legal = legalizeWithState(legal, originalNames); //XXX this handles the right bracket issue, but is v. slow

 		return legalizeLinksWithoutState(legal);
 	}


 	/**
 	 * uses a list of illegal pagenames that have been found to
 	 * legalize the links
 	 * @param input confluence syntax with links
 	 * @param originalNames unique list of illegal pagenames
 	 * @return
 	 */
 	protected String legalizeWithState(String input, HashSet<String> originalNames) {
 		if (originalNames == null)
 			throw new IllegalArgumentException(
 					"illegal pagenames object must not be null. Use setIllegalPagenames before calling this method.");

 		String contentWithLinks = input;
 		for (String pagename : originalNames) {
 			String replacement = convertIllegalName(pagename);
 			String pagenamePattern = createPagenamePattern(pagename);
 			replacement = "{group1}" + replacement;
 			contentWithLinks = RegexUtil.loopRegex(contentWithLinks, pagenamePattern, replacement);
 		}

 		return contentWithLinks;
 	}

 	/**
 	 * creates a string representing a regex that would find
 	 * a link to the given pagename
 	 * @param pagename
 	 * @return regex for finding links to the given pagename
 	 */
 	private String createPagenamePattern(String pagename) {
 		pagename = Pattern.quote(pagename); //escapes any regex chars
 		String pattern =
 			linksPrefix +  	//left bracket
 			"(" +			//start capture (group1)
 				".*?" +		//anything until
 			")" +			//end capture (group 1)
 			pagename +		//the pagename
 			linksSuffix;	//right bracket
 		return pattern;
 	}

 	/**
 	 * transforms the links in the given input such that
 	 * any links to illegal pages are transformed to their
 	 * legal equivalent
 	 * @param input
 	 * @return input with legal links
 	 */
 	protected String legalizeLinksWithoutState(String input) {
 		//Look for links
 		StringBuffer sb = new StringBuffer();
 		boolean found = false;
 		Matcher linkFinder = linkPattern.matcher(input);
 		while (linkFinder.find()) {
 			if (escaped(input, linkFinder.start(), '\\')) {
 				continue;
 			}
 			found = true;
 			String linkContents = linkFinder.group(1);

 			//link parts: alias, anchor symbol (if any), link
 			String alias = identifyAlias(linkContents);
 			if (alias == null) alias = "";
 			if (!"".equals(alias)) alias += ALIAS_DELIM;
 			String anchor = identifyInPageAnchor(linkContents.replaceFirst("^[^|]+\\|", ""));
 			String link = identifyLink(linkContents);
 			if (isAttachment(link))
 				continue;
 			String otherAnchor = identifyOtherPageAnchor(link);
 			if (!"".equals(otherAnchor)) otherAnchor = "#" + otherAnchor;
 			//if theirs another anchor, remove that from the link string
 			String pagename =  (!"".equals(otherAnchor))
 					?link.substring(0,link.length() - otherAnchor.length())
 					:link;

 			if (!isExternalLink(pagename)) { //if it's external, it can't have an illegal confluence name.
 				String space = "";
 				if (hasSpace(pagename)) {
 					space = identifySpace(pagename);
 					pagename = removeItem(pagename, space);
 				}
 				String blogdate = "";
 				if (isBlogpost(pagename)) {
 					blogdate = identifyBlogdate(pagename);
 					pagename = removeItem(pagename, blogdate);
 				}
 				//important for syntax like shortcut links
 				this.setAllowAt(allowsAt());
 				this.setAllowTilde(allowsTilde());
 				pagename = convertIllegalName(pagename); //get rid of the illegal chars here.
 				//rebuild with parts that were put aside
 				pagename = space + blogdate + pagename;
 			}

 			String replacement = alias + anchor + pagename + otherAnchor;
 			replacement = RegexUtil.handleEscapesInReplacement(replacement);
 			linkFinder.appendReplacement(sb, replacement);
 		}
 		if (found) {
 			linkFinder.appendTail(sb);
 			return sb.toString();
 		}
 		return input;
 	}


 	private boolean allowsAt() {
 		if (this.properties == null) return false;
 		if (this.properties.containsKey(ALLOW_AT_IN_LINKS_KEY)) {
 			String val = (String) this.properties.get(ALLOW_AT_IN_LINKS_KEY);
 			if ("true".equals(val))
 				return true;
 		}
 		return false;
 	}

 	private boolean allowsTilde() {
 		if (this.properties == null) return false;
 		if (this.properties.containsKey(ALLOW_TILDE_IN_LINKS_KEY)) {
 			String val = (String) this.properties.get(ALLOW_TILDE_IN_LINKS_KEY);
 			if ("true".equals(val))
 				return true;
 		}
 		return false;
 	}

 	protected boolean hasSpace(String pagename) {
 		return pagename.contains(":");
 	}

 	Pattern space = Pattern.compile("" +
 			"^([^:]+:)");
 	protected String identifySpace(String pagename) {
 		Matcher spaceFinder = space.matcher(pagename);
 		if (spaceFinder.find()) {
 			return spaceFinder.group(1);
 		}
 		return "";
 	}

 	protected String removeItem(String pagename, String item) {
 		item = "\\Q" + item + "\\E";
 		return RegexUtil.loopRegex(pagename, item, "");
 	}

 	Pattern blogdate = Pattern.compile("^\\/\\d{4,4}\\/\\d{2,2}\\/\\d{2,2}\\/");
 	protected boolean isBlogpost(String pagename) {
 		return blogdate.matcher(pagename).lookingAt();
 	}

 	protected String identifyBlogdate(String pagename) {
 		Matcher blogFinder = blogdate.matcher(pagename);
 		if (blogFinder.find()) {
 			return blogFinder.group();
 		}
 		return "";
 	}

 	/**
 	 * @param input string that might have an escaped character
 	 * @param index index of character that might be escaped
 	 * @param ch character used to escape the character at the given index (probably backslash)
 	 * @return true if the character at the given index of the input string is escaped
 	 * by the given character
 	 */
 	protected boolean escaped(String input, int index, char ch) {
 		if (index == 0 || index == 1)
 			return false;
 		if (input.charAt(index-2) == ch) return true;
 		return false;
 	}

 	String alias =
 		"([^|]+)" +	// (group 1) not a pipe until the end or
 		"(\\|.*)?"; // (group 2) optional pipe and then anything until the end
 	Pattern aliasPattern = Pattern.compile(alias);
 	/**
 	 * figures out the alias content of a confluence link
 	 * @param input the link (without brackets). So for example:<br/>
 	 * If the Confluence syntax for a link was "[alias|Page Name]", then the input
 	 * you would pass would be "alias|Page Name".
 	 * @return the alias, for the above example, the return value would be "alias";
 	 */
 	protected String identifyAlias(String input) {
 		Matcher aliasFinder = aliasPattern.matcher(input);
 		if (aliasFinder.find()) {
 			if (aliasFinder.group(2) == null)
 				return ""; //no alias
 			return aliasFinder.group(1);
 		}
 		return input;
 	}

 	String anchor =
 		"(?:^|\\|)" + //the beginning of the string or a pipe
 		"(#)";		  //a hash
 	Pattern anchorPattern = Pattern.compile(anchor);
 	/**
 	 * examines a link, and returns a #, if the link
 	 * has an anchor to a section within this page. For example:<br/>
 	 * If a confluence link was like so: "[alias|#anchor]",
 	 * then the input would be "alias|#anchor",
 	 * and the return value would be #.
 	 * However, as only in-page-anchors are found  by this method,
 	 * an empty string would be returned if the input was:
 	 * alias|OtherPage#anchor
 	 * @param input The contents of the link.
 	 * @return A hash (#) symbol, if such an anchor
 	 * exists in the given input, or an empty string (""),
 	 * if no such anchor exists.
 	 */
 	protected String identifyInPageAnchor(String input) {
 		Matcher anchorFinder = anchorPattern.matcher(input);
 		if (anchorFinder.find()) {
 			return "#";
 		}
 		return "";
 	}

 	String otherAnchor =
 		"^" +			//beginning of string
 		"[^#]+" +		//not a hash
 		"#" +			//one hash
 		"(" +			//start capture (group1)
 			".*" +		//everything til the end
 		")";			//end capture (group1)
 	Pattern otherAnchorPattern = Pattern.compile(otherAnchor);
 	/**
 	 * examines a link, and if it finds a anchor for another page
 	 * it returns the anchor. For example:
 	 * If a confluence link was "[alias|OtherPage#anchor]",
 	 * then the input would need to be "alias|OtherPage#anchor",
 	 * and the return value would be "#anchor".
 	 * @param input The contents of a confluence link (minus the enclosing brackets)
 	 * @return the anchor
 	 */
 	protected String identifyOtherPageAnchor(String input) {
 		Matcher anchorFinder = otherAnchorPattern.matcher(input);
 		if (anchorFinder.find()) {
 			return anchorFinder.group(1);
 		}
 		return "";
 	}

 	String linkContent =
 		"[^#|]" + //not a hash or a pipe
 		"[^|]*" + //not a pipe until
 		"$";	  //the end of the string
 	Pattern linkContentPattern = Pattern.compile(linkContent);
 	/**
 	 * gets the link to the page, for a given confluence link.
 	 * For Example<br/>
 	 * If a Confluence link was "[alias|page#anchor]",
 	 * The input would be alias|page#anchor, and the return value would be
 	 * page#anchor.
 	 * <br/>
 	 * If a Confluence link was "[#anchor]",
 	 * then the input would be "#anchor", and the return value would be "anchor".
 	 * If a Confluence link was "[^attachment1.gif]", the input would be
 	 * "^attachment1.gif, and the return value would be "^attachment.gif"
 	 * @param input confluence link minus the brackets
 	 * @return the page name the link is for
 	 */
 	protected String identifyLink(String input) {
 		Matcher linkFinder = linkContentPattern.matcher(input);
 		if (linkFinder.find()) {
 			return linkFinder.group();
 		}
 		return input;
 	}

 	/**
 	 * @param input a confluence link, minus such extraneous details
 	 * as aliases, and the anchors of other pagenames
 	 * For example,<br/>
 	 * if input = "^attachment.gif", then it return true.
 	 * if input = "pagename", then it returns false.
 	 * @return true if the link references an attachment
 	 */
 	protected boolean isAttachment(String input) {
 		return input.contains("^");
 	}


 	String codeblockTokenizerConverterString =
 			"(" +				//start capture (group 1)
 				"\\{" +			//a left brace
 					"code" +	//the string "code"
 					"[^}]*" +	//anything but a right brace until
 				"\\}" +			//a right brace
 				"(" +			//start capture (group 2)
 					".*?" +		//anything except a newline until
 				")" +			//end capture (group 2)
 				"\\{" +			//a left brace
 					"code" +	//the string "code"
 				"\\}" +			//a right brace
 			")" +				//end capture (group 1)
 			JavaRegexAndTokenizerConverter.REGEX_SEPERATOR_MULTI_LINE + //converter regex replacement trigger
 			"$1";				//replacement
 	String noformatblockTokenizerConverterString =
 		"(" +				//start capture (group 1)
 			"\\{" +			//a left brace
 				"noformat" +	//the string "noformat"
 				"[^}]*" +	//anything but a right brace until
 			"\\}" +			//a right brace
 			"(" +			//start capture (group 2)
 				".*?" +		//anything except a newline until
 			")" +			//end capture (group 2)
 			"\\{" +			//a left brace
 				"noformat" +	//the string "noformat"
 			"\\}" +			//a right brace
 		")" +				//end capture (group 1)
 		JavaRegexAndTokenizerConverter.REGEX_SEPERATOR_MULTI_LINE + //converter regex replacement trigger
 		"$1";				//replacement

 	/**
 	 * tokenizes any instances of code blocks, so that
 	 * the contents of a code block is not affected by this class
 	 * @param page page with code blocks to tokenize
 	 * @return page with tokenzied code blocks
 	 */
 	protected Page tokenizeCodeBlocks(Page page) {
 		JavaRegexAndTokenizerConverter codeTokenizer =
 			(JavaRegexAndTokenizerConverter) JavaRegexAndTokenizerConverter.getConverter(
 				this.codeblockTokenizerConverterString);
 		codeTokenizer.convert(page);
 		page.setOriginalText(page.getConvertedText());
 		JavaRegexAndTokenizerConverter noformatTokenizer =
 			(JavaRegexAndTokenizerConverter) JavaRegexAndTokenizerConverter.getConverter(
 				this.noformatblockTokenizerConverterString);
 		noformatTokenizer.convert(page);
 		return page;
 	}
 	/**
 	 * detokenizes any code block tokens.
 	 * @param page page to detokenize
 	 * @return detokenized page
 	 */
 	protected Page detokenizeCodeBlocks(Page page) {
 		DetokenizerConverter detokenizer =
 			new DetokenizerConverter();
 		detokenizer.convert(page);
 		return page;
 	}
 }
	package com.atlassian.uwc.converters;

	import java.util.HashSet;
	import java.util.regex.Matcher;
	import java.util.regex.Pattern;

	import com.atlassian.uwc.converters.tikiwiki.RegexUtil;
	import com.atlassian.uwc.converters.twiki.JavaRegexAndTokenizerConverter;
	import com.atlassian.uwc.ui.Page;

	/**
	* @author Laura Kolker
	* handles correcting links to illegal pagenames, that would have been changed
	* to something legal by IllegalPageNameConverter
	*/
	public class IllegalLinkNameConverter extends IllegalNameConverter {

	private static final String PROPKEY_CUSTOMPROTOCOL = "illegalnames-customprotocol";

	/**
	* delimiter for an alias in links
	*/
	private static final String ALIAS_DELIM = "\|";

	public static final String ALLOW_AT_IN_LINKS_KEY = "allow-at-in-links";
	public static final String ALLOW_TILDE_IN_LINKS_KEY = "allow-tilde-in-links";

	public void convert(Page page) {
	log.info("Converting Links Referencing Illegal Names - start");

	tokenizeCodeBlocks(page);
	//save the tokenized changes
	page.setOriginalText(page.getConvertedText());

	String input = page.getOriginalText();
	String converted = legalizeLinks(input);
	page.setConvertedText(converted);


	//save the converted text so we can detokenize
	page.setOriginalText(converted);
	detokenizeCodeBlocks(page);

	log.info("Converting Links Referencing Illegal Names - complete");
	}

	String linksPrefix =
	"(?<=" + //zero-width group starts
	"\\[" + //left bracket
	")"; //end zero-width group


	String linksSuffix =
	"(?=" + //zero-width group starts
	"\\]" + //right bracket
	")"; //end zero-width group
	String links =
	linksPrefix + //left Bracket
	"("+ //start capture (group 1)
	".*?" + //everything until
	"[^\\\\]" + //not a backslash (this would mean our closing bracket was escaped
	")" + //end capture (group 1)
	linksSuffix; //rightBracket

	Pattern linkPattern = Pattern.compile(links);

	String brackets = "(?<=^\|[^\\\\])(\\[\|\\])";
	Pattern bracketPattern = Pattern.compile(brackets);

	String protocol = "((https?://)\|(mailto:)\|(file:)\|(ftp:))(.*)";
	Pattern protocolPattern = Pattern.compile(protocol);

	/**
	* @param input
	* @return true if the link is an external link (http://somepage.com),
	* returns false if the link is an internal wiki page
	*/
	public boolean isExternalLink(String input) {
	Matcher protocolFinder;
	if (this.getProperties().containsKey(PROPKEY_CUSTOMPROTOCOL)) {
	try {
	String custom = this.getProperties().getProperty(PROPKEY_CUSTOMPROTOCOL, protocol);
	Pattern customPattern = Pattern.compile(custom);
	protocolFinder = customPattern.matcher(input);
	} catch (RuntimeException e) {
	log.warn("Problem compiling custom protocol with link: " + input + " - Using default protocol.");
	protocolFinder = protocolPattern.matcher(input);
	}
	}
	else
	protocolFinder = protocolPattern.matcher(input);
	return protocolFinder.lookingAt();
	}


	/**
	* transforms any links to illegal pagenames to their
	* legal counterpart
	* @param input
	* @return input with legalized links
	*/
	protected String legalizeLinks(String input) {
	String legal = input;

	// HashSet<String> originalNames = getIllegalPagenames();
	// if (originalNames != null)
	// legal = legalizeWithState(legal, originalNames); //XXX this handles the right bracket issue, but is v. slow

	return legalizeLinksWithoutState(legal);
	}



	/**
	* uses a list of illegal pagenames that have been found to
	* legalize the links
	* @param input confluence syntax with links
	* @param originalNames unique list of illegal pagenames
	* @return
	*/
	protected String legalizeWithState(String input, HashSet<String> originalNames) {
	if (originalNames == null)
	throw new IllegalArgumentException(
	"illegal pagenames object must not be null. Use setIllegalPagenames before calling this method.");

	String contentWithLinks = input;
	for (String pagename : originalNames) {
	String replacement = convertIllegalName(pagename);
	String pagenamePattern = createPagenamePattern(pagename);
	replacement = "{group1}" + replacement;
	contentWithLinks = RegexUtil.loopRegex(contentWithLinks, pagenamePattern, replacement);
	}

	return contentWithLinks;
	}

	/**
	* creates a string representing a regex that would find
	* a link to the given pagename
	* @param pagename
	* @return regex for finding links to the given pagename
	*/
	private String createPagenamePattern(String pagename) {
	pagename = Pattern.quote(pagename); //escapes any regex chars
	String pattern =
	linksPrefix + //left bracket
	"(" + //start capture (group1)
	".*?" + //anything until
	")" + //end capture (group 1)
	pagename + //the pagename
	linksSuffix; //right bracket
	return pattern;
	}

	/**
	* transforms the links in the given input such that
	* any links to illegal pages are transformed to their
	* legal equivalent
	* @param input
	* @return input with legal links
	*/
	protected String legalizeLinksWithoutState(String input) {
	//Look for links
	StringBuffer sb = new StringBuffer();
	boolean found = false;
	Matcher linkFinder = linkPattern.matcher(input);
	while (linkFinder.find()) {
	if (escaped(input, linkFinder.start(), '\\')) {
	continue;
	}
	found = true;
	String linkContents = linkFinder.group(1);

	//link parts: alias, anchor symbol (if any), link
	String alias = identifyAlias(linkContents);
	if (alias == null) alias = "";
	if (!"".equals(alias)) alias += ALIAS_DELIM;
	String anchor = identifyInPageAnchor(linkContents.replaceFirst("^[^\|]+\\\|", ""));
	String link = identifyLink(linkContents);
	if (isAttachment(link))
	continue;
	String otherAnchor = identifyOtherPageAnchor(link);
	if (!"".equals(otherAnchor)) otherAnchor = "#" + otherAnchor;
	//if theirs another anchor, remove that from the link string
	String pagename = (!"".equals(otherAnchor))
	?link.substring(0,link.length() - otherAnchor.length())
	:link;

	if (!isExternalLink(pagename)) { //if it's external, it can't have an illegal confluence name.
	String space = "";
	if (hasSpace(pagename)) {
	space = identifySpace(pagename);
	pagename = removeItem(pagename, space);
	}
	String blogdate = "";
	if (isBlogpost(pagename)) {
	blogdate = identifyBlogdate(pagename);
	pagename = removeItem(pagename, blogdate);
	}
	//important for syntax like shortcut links
	this.setAllowAt(allowsAt());
	this.setAllowTilde(allowsTilde());
	pagename = convertIllegalName(pagename); //get rid of the illegal chars here.
	//rebuild with parts that were put aside
	pagename = space + blogdate + pagename;
	}

	String replacement = alias + anchor + pagename + otherAnchor;
	replacement = RegexUtil.handleEscapesInReplacement(replacement);
	linkFinder.appendReplacement(sb, replacement);
	}
	if (found) {
	linkFinder.appendTail(sb);
	return sb.toString();
	}
	return input;
	}


	private boolean allowsAt() {
	if (this.properties == null) return false;
	if (this.properties.containsKey(ALLOW_AT_IN_LINKS_KEY)) {
	String val = (String) this.properties.get(ALLOW_AT_IN_LINKS_KEY);
	if ("true".equals(val))
	return true;
	}
	return false;
	}

	private boolean allowsTilde() {
	if (this.properties == null) return false;
	if (this.properties.containsKey(ALLOW_TILDE_IN_LINKS_KEY)) {
	String val = (String) this.properties.get(ALLOW_TILDE_IN_LINKS_KEY);
	if ("true".equals(val))
	return true;
	}
	return false;
	}

	protected boolean hasSpace(String pagename) {
	return pagename.contains(":");
	}

	Pattern space = Pattern.compile("" +
	"^([^:]+:)");
	protected String identifySpace(String pagename) {
	Matcher spaceFinder = space.matcher(pagename);
	if (spaceFinder.find()) {
	return spaceFinder.group(1);
	}
	return "";
	}

	protected String removeItem(String pagename, String item) {
	item = "\\Q" + item + "\\E";
	return RegexUtil.loopRegex(pagename, item, "");
	}

	Pattern blogdate = Pattern.compile("^\\/\\d{4,4}\\/\\d{2,2}\\/\\d{2,2}\\/");
	protected boolean isBlogpost(String pagename) {
	return blogdate.matcher(pagename).lookingAt();
	}

	protected String identifyBlogdate(String pagename) {
	Matcher blogFinder = blogdate.matcher(pagename);
	if (blogFinder.find()) {
	return blogFinder.group();
	}
	return "";
	}

	/**
	* @param input string that might have an escaped character
	* @param index index of character that might be escaped
	* @param ch character used to escape the character at the given index (probably backslash)
	* @return true if the character at the given index of the input string is escaped
	* by the given character
	*/
	protected boolean escaped(String input, int index, char ch) {
	if (index == 0 \|\| index == 1)
	return false;
	if (input.charAt(index-2) == ch) return true;
	return false;
	}

	String alias =
	"([^\|]+)" + // (group 1) not a pipe until the end or
	"(\\\|.*)?"; // (group 2) optional pipe and then anything until the end
	Pattern aliasPattern = Pattern.compile(alias);
	/**
	* figures out the alias content of a confluence link
	* @param input the link (without brackets). So for example:<br/>
	* If the Confluence syntax for a link was "[alias\|Page Name]", then the input
	* you would pass would be "alias\|Page Name".
	* @return the alias, for the above example, the return value would be "alias";
	*/
	protected String identifyAlias(String input) {
	Matcher aliasFinder = aliasPattern.matcher(input);
	if (aliasFinder.find()) {
	if (aliasFinder.group(2) == null)
	return ""; //no alias
	return aliasFinder.group(1);
	}
	return input;
	}

	String anchor =
	"(?:^\|\\\|)" + //the beginning of the string or a pipe
	"(#)"; //a hash
	Pattern anchorPattern = Pattern.compile(anchor);
	/**
	* examines a link, and returns a #, if the link
	* has an anchor to a section within this page. For example:<br/>
	* If a confluence link was like so: "[alias\|#anchor]",
	* then the input would be "alias\|#anchor",
	* and the return value would be #.
	* However, as only in-page-anchors are found by this method,
	* an empty string would be returned if the input was:
	* alias\|OtherPage#anchor
	* @param input The contents of the link.
	* @return A hash (#) symbol, if such an anchor
	* exists in the given input, or an empty string (""),
	* if no such anchor exists.
	*/
	protected String identifyInPageAnchor(String input) {
	Matcher anchorFinder = anchorPattern.matcher(input);
	if (anchorFinder.find()) {
	return "#";
	}
	return "";
	}

	String otherAnchor =
	"^" + //beginning of string
	"[^#]+" + //not a hash
	"#" + //one hash
	"(" + //start capture (group1)
	".*" + //everything til the end
	")"; //end capture (group1)
	Pattern otherAnchorPattern = Pattern.compile(otherAnchor);
	/**
	* examines a link, and if it finds a anchor for another page
	* it returns the anchor. For example:
	* If a confluence link was "[alias\|OtherPage#anchor]",
	* then the input would need to be "alias\|OtherPage#anchor",
	* and the return value would be "#anchor".
	* @param input The contents of a confluence link (minus the enclosing brackets)
	* @return the anchor
	*/
	protected String identifyOtherPageAnchor(String input) {
	Matcher anchorFinder = otherAnchorPattern.matcher(input);
	if (anchorFinder.find()) {
	return anchorFinder.group(1);
	}
	return "";
	}

	String linkContent =
	"[^#\|]" + //not a hash or a pipe
	"[^\|]*" + //not a pipe until
	"$"; //the end of the string
	Pattern linkContentPattern = Pattern.compile(linkContent);
	/**
	* gets the link to the page, for a given confluence link.
	* For Example<br/>
	* If a Confluence link was "[alias\|page#anchor]",
	* The input would be alias\|page#anchor, and the return value would be
	* page#anchor.
	* <br/>
	* If a Confluence link was "[#anchor]",
	* then the input would be "#anchor", and the return value would be "anchor".
	* If a Confluence link was "[^attachment1.gif]", the input would be
	* "^attachment1.gif, and the return value would be "^attachment.gif"
	* @param input confluence link minus the brackets
	* @return the page name the link is for
	*/
	protected String identifyLink(String input) {
	Matcher linkFinder = linkContentPattern.matcher(input);
	if (linkFinder.find()) {
	return linkFinder.group();
	}
	return input;
	}

	/**
	* @param input a confluence link, minus such extraneous details
	* as aliases, and the anchors of other pagenames
	* For example,<br/>
	* if input = "^attachment.gif", then it return true.
	* if input = "pagename", then it returns false.
	* @return true if the link references an attachment
	*/
	protected boolean isAttachment(String input) {
	return input.contains("^");
	}



	String codeblockTokenizerConverterString =
	"(" + //start capture (group 1)
	"\\{" + //a left brace
	"code" + //the string "code"
	"[^}]*" + //anything but a right brace until
	"\\}" + //a right brace
	"(" + //start capture (group 2)
	".*?" + //anything except a newline until
	")" + //end capture (group 2)
	"\\{" + //a left brace
	"code" + //the string "code"
	"\\}" + //a right brace
	")" + //end capture (group 1)
	JavaRegexAndTokenizerConverter.REGEX_SEPERATOR_MULTI_LINE + //converter regex replacement trigger
	"$1"; //replacement
	String noformatblockTokenizerConverterString =
	"(" + //start capture (group 1)
	"\\{" + //a left brace
	"noformat" + //the string "noformat"
	"[^}]*" + //anything but a right brace until
	"\\}" + //a right brace
	"(" + //start capture (group 2)
	".*?" + //anything except a newline until
	")" + //end capture (group 2)
	"\\{" + //a left brace
	"noformat" + //the string "noformat"
	"\\}" + //a right brace
	")" + //end capture (group 1)
	JavaRegexAndTokenizerConverter.REGEX_SEPERATOR_MULTI_LINE + //converter regex replacement trigger
	"$1"; //replacement

	/**
	* tokenizes any instances of code blocks, so that
	* the contents of a code block is not affected by this class
	* @param page page with code blocks to tokenize
	* @return page with tokenzied code blocks
	*/
	protected Page tokenizeCodeBlocks(Page page) {
	JavaRegexAndTokenizerConverter codeTokenizer =
	(JavaRegexAndTokenizerConverter) JavaRegexAndTokenizerConverter.getConverter(
	this.codeblockTokenizerConverterString);
	codeTokenizer.convert(page);
	page.setOriginalText(page.getConvertedText());
	JavaRegexAndTokenizerConverter noformatTokenizer =
	(JavaRegexAndTokenizerConverter) JavaRegexAndTokenizerConverter.getConverter(
	this.noformatblockTokenizerConverterString);
	noformatTokenizer.convert(page);
	return page;
	}
	/**
	* detokenizes any code block tokens.
	* @param page page to detokenize
	* @return detokenized page
	*/
	protected Page detokenizeCodeBlocks(Page page) {
	DetokenizerConverter detokenizer =
	new DetokenizerConverter();
	detokenizer.convert(page);
	return page;
	}
	}