src/main/java/com/atlassian/uwc/converters/IllegalNameConverter.java - universal-wiki-converter - Git at Google

 package com.atlassian.uwc.converters;

 import java.io.IOException;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Properties;
 import java.util.TreeMap;
 import java.util.Vector;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;

 import org.apache.commons.httpclient.URIException;
 import org.apache.commons.httpclient.util.URIUtil;

 import com.atlassian.uwc.converters.IllegalChar.Type;
 import com.atlassian.uwc.util.PropertyFileManager;


 /**
  * @author Laura Kolker
  * parent class for IllegalLinkNameConverter and IllegalPageNameConverter.
  * It can be used to find illegal page names, and help translate them
  * to legal equivalents.
  */
 public abstract class IllegalNameConverter extends BaseConverter {

 	private static final String URLDECODE_KEY = "illegalnames-urldecode";
 	private static final String DEFAULT_REPLACEMENT = "_";
 	private static final String ILLEGALCHAR_PROP_SUFFIX = ".replacement";
 	private static final String ILLEGALCHAR_PROP_PREFIX = "illegalchar.";
 	private static final String ILLEGALSTART_PROP_PREFIX = "illegalstart.";
 	private static final String CONFIG_SETTINGS_FILE = "settings.illegalcharmap.properties";
 	private static final String PROP_DIR = "conf";
 	private static final String FILE_SEP = System.getProperty("file.separator");

 	private String[] requiredIllegalChars = {
 			"colon",
 			"semicolon",
 			"lessthan",
 			"greaterthan",
 			"at",
 			"forwardslash",
 			"backslash",
 			"pipe",
 			"hash",
 			"leftbracket",
 			"rightbracket",
 			"leftcurlybrace",
 			"rightcurlybrace",
 			"carat",
 	};
 	private String[] requiredIllegalStartChars = {
 			"dollar",
 			"twodots", //..
 			"tilde",
 	};

 	private HashMap<String,String> illegalKeyValues = null;

 	private HashSet<String> illegalPagenames = null;

 	/**
 	 * if true, the @ character will be allowed to remain.
 	 * This is useful for links that might have the same syntax
 	 * as Confluence's shortcut links.
 	 * Currently the only way to set this is to put the following property
 	 * in the converter properties file:
 	 * wiki.xxxx.allow-at-in-links.property=true
 	 * That property will only affect links. It will not affect pagenames
 	 */
 	private boolean allowAt = false;
 	/**
 	 * if true, the ~ character will be allowed to remain.
 	 * This is useful for converting to user profile links.
 	 * Currently the only way to set this is to put the following property
 	 * in the converter properties file:
 	 * wiki.xxxx.allow-tilde-in-links.property=true
 	 * That property will only affect links. It will not affect pagenames
 	 */
 	private boolean allowTilde = false;

 	/* public methods */
 	public IllegalNameConverter() {
 		buildIllegalKeyValueMap();
 	}

 	/**
 	 * looks to see if the given input is illegal
 	 * and returns that input with legal equivalents
 	 * @param input the input to examine for illegal characters
 	 * @return input with legal equivalent
 	 */
 	public String convertIllegalName(String input) {
 		//Create map of replacements
 		List<IllegalChar> illegalChars = getIllegalCharObjects();
 		//Look for illegal characters
 		String legal = searchAndReplaceIllegalChars(input, illegalChars);
 		//replace illegal characters using replacement map
 		return legal;
 	}

 	/* private or protected - Helper methods */

 	/**
 	 * creates the key-value map of illegal characters
 	 */
 	private void buildIllegalKeyValueMap() {
 		illegalKeyValues = new HashMap<String, String>();
 		illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "colon" + ILLEGALCHAR_PROP_SUFFIX, ":");
 		illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "semicolon" + ILLEGALCHAR_PROP_SUFFIX, ";");
 		illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "lessthan" + ILLEGALCHAR_PROP_SUFFIX, "<");
 		illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "greaterthan" + ILLEGALCHAR_PROP_SUFFIX, ">");
 		illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "at" + ILLEGALCHAR_PROP_SUFFIX, "@");
 		illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "forwardslash" + ILLEGALCHAR_PROP_SUFFIX, "/");
 		illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "backslash" + ILLEGALCHAR_PROP_SUFFIX, "\\");
 		illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "pipe" + ILLEGALCHAR_PROP_SUFFIX, "|");
 		illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "hash" + ILLEGALCHAR_PROP_SUFFIX, "#");
 		illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "leftbracket" + ILLEGALCHAR_PROP_SUFFIX, "[");
 		illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "rightbracket" + ILLEGALCHAR_PROP_SUFFIX, "]");
 		illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "leftcurlybrace" + ILLEGALCHAR_PROP_SUFFIX, "{");
 		illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "rightcurlybrace" + ILLEGALCHAR_PROP_SUFFIX, "}");
 		illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "carat" + ILLEGALCHAR_PROP_SUFFIX, "^");
 		illegalKeyValues.put(ILLEGALSTART_PROP_PREFIX + "dollar" + ILLEGALCHAR_PROP_SUFFIX, "$");
 		illegalKeyValues.put(ILLEGALSTART_PROP_PREFIX + "twodots" + ILLEGALCHAR_PROP_SUFFIX, "..");
 		illegalKeyValues.put(ILLEGALSTART_PROP_PREFIX + "tilde" + ILLEGALCHAR_PROP_SUFFIX, "~");

 	}

 	/**
 	 * @return List of IllegalChar objects representing all of the
 	 * illegal pagename characters that Confluence forbids.
 	 */
 	protected List<IllegalChar> getIllegalCharObjects() {
 		List<IllegalChar> illegalChars = new Vector<IllegalChar>();

 		TreeMap<String, String> properties = getIllegalCharProperties();

 		illegalChars.addAll(getIllegalAnywhere(properties, requiredIllegalChars));
 		illegalChars.addAll(getIllegalStarting(properties, requiredIllegalStartChars));

 		return illegalChars;
 	}

 	/**
 	 * @return a map of the illegalchar properties from the
 	 * settings.illegalcharmap.properties file
 	 */
 	protected TreeMap<String, String> getIllegalCharProperties() {
 		String propLocation = PROP_DIR + FILE_SEP + CONFIG_SETTINGS_FILE;
 		TreeMap<String, String> properties = null;
 		try {
 			properties = PropertyFileManager.loadPropertiesFile(propLocation);
 		} catch (IOException e) {
 			log.error("Problem loading properties file: " + propLocation);
 			e.printStackTrace();
 		}
 		return properties;
 	}

 	/**
 	 * @param properties
 	 * @param required
 	 * @return
 	 */
 	protected Vector<IllegalChar> getIllegalAnywhere(
 			TreeMap<String, String> properties, String[] required) {
 		Vector<IllegalChar> illegals = new Vector<IllegalChar>();

 		for (int i = 0; i < required.length; i++) {
 			String baseKey = required[i];
 			if (this.allowAt && "at".equals(baseKey)) continue;
 			String key = ILLEGALCHAR_PROP_PREFIX +
 					baseKey +
 					ILLEGALCHAR_PROP_SUFFIX;
 			IllegalChar illegal =
 				createIllegalChar(properties, key, IllegalChar.Type.ANYWHERE);
 			illegals.add(illegal);
 		}

 		return illegals;
 	}

 	/**
 	 * creates an IllegalChar object for the given key and type,
 	 * using the give properties
 	 * @param properties customized properties from the settings.illegalcharmap.properties
 	 * @param key a key that is referenced in the given properties
 	 * @param type expected IllegalChar type (ANYWHERE, or START_ONLY)
 	 * @return an IllegalChar object
 	 */
 	protected IllegalChar createIllegalChar(
 			TreeMap<String, String> properties, String key, IllegalChar.Type type) {
 		String replacement = null;

 		if (properties.containsKey(key)) {
 			replacement = properties.get(key);
 			replacement = (isLegalReplacement(replacement, type))?
 					replacement:
 					getDefaultReplacement();
 		}
 		else {
 			replacement = getDefaultReplacement();
 		}

 		String value = getIllegalCharValue(key);
 		IllegalChar illegal =
 			new IllegalChar(value, replacement, type);

 		return illegal;
 	}

 	private String illegalChars = " : ; < > @ / \\ | # [ ] { } ^ ";
 	private String illegalStart = " $ .. ~ ";
 	/**
 	 * determins if a candidate replacement for the given IllegalChar.Type
 	 * is a legal replacement
 	 * @param replacement candidate replacement
 	 * @param type ANYWHERE or START_ONLY
 	 * @return true if the replacement is legal
 	 */
 	protected boolean isLegalReplacement(String replacement, Type type) {
 		Pattern oneCharPattern = getReplacementPattern(replacement);
 		String input = (type == IllegalChar.Type.START_ONLY)?illegalStart:illegalChars;
 		Matcher illegalFinder = oneCharPattern.matcher(input);
 		boolean notFound = ! (illegalFinder.find());
 		return notFound;
 	}

 	/**
 	 * creates a regex Pattern for finding the given replacement in a string
 	 * @param a char or series of chars that could be used as a replacement
 	 * for some IllegalChar
 	 * @return Pattern with the given replacement character seperated by spaces.
 	 * It will be useful for comparing a candidate replacment against
 	 * strings illegalChars and illegalStart
 	 */
 	private Pattern getReplacementPattern(String replacement) {
 		String delim = "\\";
 		String oneChar =
 				" " +			//a space
 				delim + 		//escape the next char
 				replacement +
 				" ";			//a space
 		Pattern oneCharPattern = null;
 		try {
 			oneCharPattern = Pattern.compile(oneChar);
 		} catch (PatternSyntaxException e ) { //no backslash necessary
 			oneChar = " " + replacement + " "; //try without delim
 			oneCharPattern = Pattern.compile(oneChar);
 		}
 		return oneCharPattern;
 	}

 	/**
 	 * @return the default replacement char. (Used if
 	 * a customized replacement char is illegal itsel.f)
 	 */
 	protected String getDefaultReplacement() {
 		return DEFAULT_REPLACEMENT;
 	}

 	/**
 	 * @param key gets the current character value for
 	 * a given illegalchar key.
 	 * Example: input = illegalchar.colon.replacement
 	 * return value = ":", or return the key, if no such value exists
 	 * @return value for a given IllegalChar, given a key
 	 */
 	protected String getIllegalCharValue(String key) {
 		String val = this.illegalKeyValues.get(key);
 		val = (val == null)?key:val;
 		val = ("".equals(val))?key:val;
 		return val;
 	}

 	/**
 	 * creates a Vector of IllegalChar objects which describe
 	 * those of the START_ONLY type.
 	 * @param properties key-value paired properteis from settings.illegalcharmap.properties
 	 * @param required array of base keys ("colon", "greaterthan", etc.)
 	 * @return Vector of IllegalChar objects
 	 */
 	protected Vector<IllegalChar> getIllegalStarting(
 			TreeMap<String, String> properties, String[] required) {
 		Vector<IllegalChar> illegals = new Vector<IllegalChar>();

 		for (int i = 0; i < required.length; i++) {
 			String baseKey = required[i];
 			if (this.allowTilde && "tilde".equals(baseKey)) continue;
 			String key = ILLEGALSTART_PROP_PREFIX +
 					baseKey +
 					ILLEGALCHAR_PROP_SUFFIX;
 			IllegalChar illegal =
 				createIllegalChar(properties, key, IllegalChar.Type.START_ONLY);
 			illegals.add(illegal);
 		}

 		return illegals;
 	}

 	/**
 	 * Examines the given input, decodes and URL entities, and replaces each instance of
 	 * and illegal character with its designated replacement
 	 * @param input the given input to be searched
 	 * @param illegalChars a list of IllegalChar objects representing the chars that must
 	 * be replaced
 	 * @return the input with all of the proper replacements
 	 */
 	protected String searchAndReplaceIllegalChars(String input, List<IllegalChar> illegalChars) {
 		String legal = input;
 		if (shouldUrlDecode()) {
 			log.debug("undecoded filename = " + legal); //DELETE
 			legal = decodeUrl(legal);
 			log.debug("decoded filename = " + legal);
 		}
 		for (IllegalChar illegal : illegalChars) {
 			legal = illegal.getReplacement(legal);
 		}
 		return legal;
 	}

 	private boolean shouldUrlDecode() {
 		Properties props = getProperties();
 		String setting = (String) props.get(URLDECODE_KEY);
 		if (setting == null) return false;
 		if (setting.equals("true")) return true;
 		return false;
 	}

 	/**
 	 * decodes any URL entities in the given input
 	 * @param input
 	 * @return input with any URL entities decoded.
 	 */
 	public String decodeUrl(String input) {
 		String encoding = "utf-8";
 		if (input == null) {
 			log.info("decodeUrl: input is null.");
 			return input;
 		}
 		try {
 			/* we have to use the apache decoder,
 			 * as URLDecoder.decode is unreliable.
 			 * See: uwc-4, and
 			 * See: com.atlassian.uwc.converters.mediawiki.DecodeEntities
 			 */
 			return URIUtil.decode(input, encoding);
 		} catch (URIException e) {
 			log.error("Problem with URL decoding:\n" +
 					"input = " +input +"\n" +
 					"encoding = " + encoding);
 			e.printStackTrace();
 		}
 		return input;
 	}

 	/**
 	 * @return set of illegal pagenames that have been found in the
 	 * process of running this converter.
 	 */
 	public HashSet<String> getIllegalPagenames() {
 		return this.illegalPagenames;
 	}

 	/**
 	 * @param illegalNames assign set of illegal pagenames
 	 * that have been found in a previous conversion.
 	 * Useful for finding links to illegal pagenames.
 	 */
 	public void setIllegalPagenames(HashSet<String> illegalNames) {
 		this.illegalPagenames = illegalNames;
 	}

 	/**
 	 * adds the given input to the illegal pagenames object
 	 * for use in the future.
 	 * Useful for finding links to illegal pagenames.
 	 * @param input an illegal pagename
 	 */
 	public void addIllegalPagename(String input) {
 		if (this.illegalPagenames == null)
 			this.illegalPagenames = new HashSet<String>();
 		this.illegalPagenames.add(input);
 	}


 	private String illegalCharClass =
 		"[" +			//start char class
 			":;<>@/|#" +//chars
 			"\\[" +		//left bracket, still part of char class
 			"\\]" +		//actual right bracket, still part of char class
 			"{}^" +		//some more chars
 		"]" +			//end char class
 		"|" +			//or
 		"\\\\";			//a backslash (too many issues with fitting it into the char class)
 	private String illegalStartClass =
 		"[" +			//start char class
 			"$~" +		//dollar or tilde
 		"]" +			//end char class
 		"|" +			//or
 		"\\.\\.";		//two dots
 	Pattern illegalPattern = Pattern.compile(illegalCharClass);
 	Pattern illegalStartPattern = Pattern.compile(illegalStartClass);
 	Pattern entityPattern = Pattern.compile("%..");
 	/**
 	 * @param input
 	 * @return true if the given input contains illegal characters
 	 * or URL entities (which could contain illegal characters)
 	 */
 	protected boolean illegal(String input) {
 		Matcher illegalStarter = illegalStartPattern.matcher(input);
 		Matcher illegalFinder = illegalPattern.matcher(input);
 		Matcher entityFinder = entityPattern.matcher(input);
 		return illegalStarter.lookingAt() || illegalFinder.find() || entityFinder.find();
 	}

 	public void setAllowAt(boolean allow) {
 		this.allowAt = allow;
 	}

 	public void setAllowTilde(boolean allow) {
 		this.allowTilde = allow;
 	}
 }
	package com.atlassian.uwc.converters;

	import java.io.IOException;
	import java.util.HashMap;
	import java.util.HashSet;
	import java.util.List;
	import java.util.Properties;
	import java.util.TreeMap;
	import java.util.Vector;
	import java.util.regex.Matcher;
	import java.util.regex.Pattern;
	import java.util.regex.PatternSyntaxException;

	import org.apache.commons.httpclient.URIException;
	import org.apache.commons.httpclient.util.URIUtil;

	import com.atlassian.uwc.converters.IllegalChar.Type;
	import com.atlassian.uwc.util.PropertyFileManager;


	/**
	* @author Laura Kolker
	* parent class for IllegalLinkNameConverter and IllegalPageNameConverter.
	* It can be used to find illegal page names, and help translate them
	* to legal equivalents.
	*/
	public abstract class IllegalNameConverter extends BaseConverter {

	private static final String URLDECODE_KEY = "illegalnames-urldecode";
	private static final String DEFAULT_REPLACEMENT = "_";
	private static final String ILLEGALCHAR_PROP_SUFFIX = ".replacement";
	private static final String ILLEGALCHAR_PROP_PREFIX = "illegalchar.";
	private static final String ILLEGALSTART_PROP_PREFIX = "illegalstart.";
	private static final String CONFIG_SETTINGS_FILE = "settings.illegalcharmap.properties";
	private static final String PROP_DIR = "conf";
	private static final String FILE_SEP = System.getProperty("file.separator");

	private String[] requiredIllegalChars = {
	"colon",
	"semicolon",
	"lessthan",
	"greaterthan",
	"at",
	"forwardslash",
	"backslash",
	"pipe",
	"hash",
	"leftbracket",
	"rightbracket",
	"leftcurlybrace",
	"rightcurlybrace",
	"carat",
	};
	private String[] requiredIllegalStartChars = {
	"dollar",
	"twodots", //..
	"tilde",
	};

	private HashMap<String,String> illegalKeyValues = null;

	private HashSet<String> illegalPagenames = null;

	/**
	* if true, the @ character will be allowed to remain.
	* This is useful for links that might have the same syntax
	* as Confluence's shortcut links.
	* Currently the only way to set this is to put the following property
	* in the converter properties file:
	* wiki.xxxx.allow-at-in-links.property=true
	* That property will only affect links. It will not affect pagenames
	*/
	private boolean allowAt = false;
	/**
	* if true, the ~ character will be allowed to remain.
	* This is useful for converting to user profile links.
	* Currently the only way to set this is to put the following property
	* in the converter properties file:
	* wiki.xxxx.allow-tilde-in-links.property=true
	* That property will only affect links. It will not affect pagenames
	*/
	private boolean allowTilde = false;

	/* public methods */
	public IllegalNameConverter() {
	buildIllegalKeyValueMap();
	}

	/**
	* looks to see if the given input is illegal
	* and returns that input with legal equivalents
	* @param input the input to examine for illegal characters
	* @return input with legal equivalent
	*/
	public String convertIllegalName(String input) {
	//Create map of replacements
	List<IllegalChar> illegalChars = getIllegalCharObjects();
	//Look for illegal characters
	String legal = searchAndReplaceIllegalChars(input, illegalChars);
	//replace illegal characters using replacement map
	return legal;
	}

	/* private or protected - Helper methods */

	/**
	* creates the key-value map of illegal characters
	*/
	private void buildIllegalKeyValueMap() {
	illegalKeyValues = new HashMap<String, String>();
	illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "colon" + ILLEGALCHAR_PROP_SUFFIX, ":");
	illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "semicolon" + ILLEGALCHAR_PROP_SUFFIX, ";");
	illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "lessthan" + ILLEGALCHAR_PROP_SUFFIX, "<");
	illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "greaterthan" + ILLEGALCHAR_PROP_SUFFIX, ">");
	illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "at" + ILLEGALCHAR_PROP_SUFFIX, "@");
	illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "forwardslash" + ILLEGALCHAR_PROP_SUFFIX, "/");
	illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "backslash" + ILLEGALCHAR_PROP_SUFFIX, "\\");
	illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "pipe" + ILLEGALCHAR_PROP_SUFFIX, "\|");
	illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "hash" + ILLEGALCHAR_PROP_SUFFIX, "#");
	illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "leftbracket" + ILLEGALCHAR_PROP_SUFFIX, "[");
	illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "rightbracket" + ILLEGALCHAR_PROP_SUFFIX, "]");
	illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "leftcurlybrace" + ILLEGALCHAR_PROP_SUFFIX, "{");
	illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "rightcurlybrace" + ILLEGALCHAR_PROP_SUFFIX, "}");
	illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "carat" + ILLEGALCHAR_PROP_SUFFIX, "^");
	illegalKeyValues.put(ILLEGALSTART_PROP_PREFIX + "dollar" + ILLEGALCHAR_PROP_SUFFIX, "$");
	illegalKeyValues.put(ILLEGALSTART_PROP_PREFIX + "twodots" + ILLEGALCHAR_PROP_SUFFIX, "..");
	illegalKeyValues.put(ILLEGALSTART_PROP_PREFIX + "tilde" + ILLEGALCHAR_PROP_SUFFIX, "~");

	}

	/**
	* @return List of IllegalChar objects representing all of the
	* illegal pagename characters that Confluence forbids.
	*/
	protected List<IllegalChar> getIllegalCharObjects() {
	List<IllegalChar> illegalChars = new Vector<IllegalChar>();

	TreeMap<String, String> properties = getIllegalCharProperties();

	illegalChars.addAll(getIllegalAnywhere(properties, requiredIllegalChars));
	illegalChars.addAll(getIllegalStarting(properties, requiredIllegalStartChars));

	return illegalChars;
	}

	/**
	* @return a map of the illegalchar properties from the
	* settings.illegalcharmap.properties file
	*/
	protected TreeMap<String, String> getIllegalCharProperties() {
	String propLocation = PROP_DIR + FILE_SEP + CONFIG_SETTINGS_FILE;
	TreeMap<String, String> properties = null;
	try {
	properties = PropertyFileManager.loadPropertiesFile(propLocation);
	} catch (IOException e) {
	log.error("Problem loading properties file: " + propLocation);
	e.printStackTrace();
	}
	return properties;
	}

	/**
	* @param properties
	* @param required
	* @return
	*/
	protected Vector<IllegalChar> getIllegalAnywhere(
	TreeMap<String, String> properties, String[] required) {
	Vector<IllegalChar> illegals = new Vector<IllegalChar>();

	for (int i = 0; i < required.length; i++) {
	String baseKey = required[i];
	if (this.allowAt && "at".equals(baseKey)) continue;
	String key = ILLEGALCHAR_PROP_PREFIX +
	baseKey +
	ILLEGALCHAR_PROP_SUFFIX;
	IllegalChar illegal =
	createIllegalChar(properties, key, IllegalChar.Type.ANYWHERE);
	illegals.add(illegal);
	}

	return illegals;
	}

	/**
	* creates an IllegalChar object for the given key and type,
	* using the give properties
	* @param properties customized properties from the settings.illegalcharmap.properties
	* @param key a key that is referenced in the given properties
	* @param type expected IllegalChar type (ANYWHERE, or START_ONLY)
	* @return an IllegalChar object
	*/
	protected IllegalChar createIllegalChar(
	TreeMap<String, String> properties, String key, IllegalChar.Type type) {
	String replacement = null;

	if (properties.containsKey(key)) {
	replacement = properties.get(key);
	replacement = (isLegalReplacement(replacement, type))?
	replacement:
	getDefaultReplacement();
	}
	else {
	replacement = getDefaultReplacement();
	}

	String value = getIllegalCharValue(key);
	IllegalChar illegal =
	new IllegalChar(value, replacement, type);

	return illegal;
	}

	private String illegalChars = " : ; < > @ / \\ \| # [ ] { } ^ ";
	private String illegalStart = " $ .. ~ ";
	/**
	* determins if a candidate replacement for the given IllegalChar.Type
	* is a legal replacement
	* @param replacement candidate replacement
	* @param type ANYWHERE or START_ONLY
	* @return true if the replacement is legal
	*/
	protected boolean isLegalReplacement(String replacement, Type type) {
	Pattern oneCharPattern = getReplacementPattern(replacement);
	String input = (type == IllegalChar.Type.START_ONLY)?illegalStart:illegalChars;
	Matcher illegalFinder = oneCharPattern.matcher(input);
	boolean notFound = ! (illegalFinder.find());
	return notFound;
	}

	/**
	* creates a regex Pattern for finding the given replacement in a string
	* @param a char or series of chars that could be used as a replacement
	* for some IllegalChar
	* @return Pattern with the given replacement character seperated by spaces.
	* It will be useful for comparing a candidate replacment against
	* strings illegalChars and illegalStart
	*/
	private Pattern getReplacementPattern(String replacement) {
	String delim = "\\";
	String oneChar =
	" " + //a space
	delim + //escape the next char
	replacement +
	" "; //a space
	Pattern oneCharPattern = null;
	try {
	oneCharPattern = Pattern.compile(oneChar);
	} catch (PatternSyntaxException e ) { //no backslash necessary
	oneChar = " " + replacement + " "; //try without delim
	oneCharPattern = Pattern.compile(oneChar);
	}
	return oneCharPattern;
	}

	/**
	* @return the default replacement char. (Used if
	* a customized replacement char is illegal itsel.f)
	*/
	protected String getDefaultReplacement() {
	return DEFAULT_REPLACEMENT;
	}

	/**
	* @param key gets the current character value for
	* a given illegalchar key.
	* Example: input = illegalchar.colon.replacement
	* return value = ":", or return the key, if no such value exists
	* @return value for a given IllegalChar, given a key
	*/
	protected String getIllegalCharValue(String key) {
	String val = this.illegalKeyValues.get(key);
	val = (val == null)?key:val;
	val = ("".equals(val))?key:val;
	return val;
	}

	/**
	* creates a Vector of IllegalChar objects which describe
	* those of the START_ONLY type.
	* @param properties key-value paired properteis from settings.illegalcharmap.properties
	* @param required array of base keys ("colon", "greaterthan", etc.)
	* @return Vector of IllegalChar objects
	*/
	protected Vector<IllegalChar> getIllegalStarting(
	TreeMap<String, String> properties, String[] required) {
	Vector<IllegalChar> illegals = new Vector<IllegalChar>();

	for (int i = 0; i < required.length; i++) {
	String baseKey = required[i];
	if (this.allowTilde && "tilde".equals(baseKey)) continue;
	String key = ILLEGALSTART_PROP_PREFIX +
	baseKey +
	ILLEGALCHAR_PROP_SUFFIX;
	IllegalChar illegal =
	createIllegalChar(properties, key, IllegalChar.Type.START_ONLY);
	illegals.add(illegal);
	}

	return illegals;
	}

	/**
	* Examines the given input, decodes and URL entities, and replaces each instance of
	* and illegal character with its designated replacement
	* @param input the given input to be searched
	* @param illegalChars a list of IllegalChar objects representing the chars that must
	* be replaced
	* @return the input with all of the proper replacements
	*/
	protected String searchAndReplaceIllegalChars(String input, List<IllegalChar> illegalChars) {
	String legal = input;
	if (shouldUrlDecode()) {
	log.debug("undecoded filename = " + legal); //DELETE
	legal = decodeUrl(legal);
	log.debug("decoded filename = " + legal);
	}
	for (IllegalChar illegal : illegalChars) {
	legal = illegal.getReplacement(legal);
	}
	return legal;
	}

	private boolean shouldUrlDecode() {
	Properties props = getProperties();
	String setting = (String) props.get(URLDECODE_KEY);
	if (setting == null) return false;
	if (setting.equals("true")) return true;
	return false;
	}

	/**
	* decodes any URL entities in the given input
	* @param input
	* @return input with any URL entities decoded.
	*/
	public String decodeUrl(String input) {
	String encoding = "utf-8";
	if (input == null) {
	log.info("decodeUrl: input is null.");
	return input;
	}
	try {
	/* we have to use the apache decoder,
	* as URLDecoder.decode is unreliable.
	* See: uwc-4, and
	* See: com.atlassian.uwc.converters.mediawiki.DecodeEntities
	*/
	return URIUtil.decode(input, encoding);
	} catch (URIException e) {
	log.error("Problem with URL decoding:\n" +
	"input = " +input +"\n" +
	"encoding = " + encoding);
	e.printStackTrace();
	}
	return input;
	}

	/**
	* @return set of illegal pagenames that have been found in the
	* process of running this converter.
	*/
	public HashSet<String> getIllegalPagenames() {
	return this.illegalPagenames;
	}

	/**
	* @param illegalNames assign set of illegal pagenames
	* that have been found in a previous conversion.
	* Useful for finding links to illegal pagenames.
	*/
	public void setIllegalPagenames(HashSet<String> illegalNames) {
	this.illegalPagenames = illegalNames;
	}

	/**
	* adds the given input to the illegal pagenames object
	* for use in the future.
	* Useful for finding links to illegal pagenames.
	* @param input an illegal pagename
	*/
	public void addIllegalPagename(String input) {
	if (this.illegalPagenames == null)
	this.illegalPagenames = new HashSet<String>();
	this.illegalPagenames.add(input);
	}


	private String illegalCharClass =
	"[" + //start char class
	":;<>@/\|#" +//chars
	"\\[" + //left bracket, still part of char class
	"\\]" + //actual right bracket, still part of char class
	"{}^" + //some more chars
	"]" + //end char class
	"\|" + //or
	"\\\\"; //a backslash (too many issues with fitting it into the char class)
	private String illegalStartClass =
	"[" + //start char class
	"$~" + //dollar or tilde
	"]" + //end char class
	"\|" + //or
	"\\.\\."; //two dots
	Pattern illegalPattern = Pattern.compile(illegalCharClass);
	Pattern illegalStartPattern = Pattern.compile(illegalStartClass);
	Pattern entityPattern = Pattern.compile("%..");
	/**
	* @param input
	* @return true if the given input contains illegal characters
	* or URL entities (which could contain illegal characters)
	*/
	protected boolean illegal(String input) {
	Matcher illegalStarter = illegalStartPattern.matcher(input);
	Matcher illegalFinder = illegalPattern.matcher(input);
	Matcher entityFinder = entityPattern.matcher(input);
	return illegalStarter.lookingAt() \|\| illegalFinder.find() \|\| entityFinder.find();
	}

	public void setAllowAt(boolean allow) {
	this.allowAt = allow;
	}

	public void setAllowTilde(boolean allow) {
	this.allowTilde = allow;
	}
	}