blob: 7a232292c3b29414103985f2747d0ee4b5330a55 [file] [log] [blame]
package com.atlassian.uwc.converters;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.TreeMap;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.httpclient.util.URIUtil;
import com.atlassian.uwc.converters.IllegalChar.Type;
import com.atlassian.uwc.util.PropertyFileManager;
/**
* @author Laura Kolker
* parent class for IllegalLinkNameConverter and IllegalPageNameConverter.
* It can be used to find illegal page names, and help translate them
* to legal equivalents.
*/
public abstract class IllegalNameConverter extends BaseConverter {
private static final String URLDECODE_KEY = "illegalnames-urldecode";
private static final String DEFAULT_REPLACEMENT = "_";
private static final String ILLEGALCHAR_PROP_SUFFIX = ".replacement";
private static final String ILLEGALCHAR_PROP_PREFIX = "illegalchar.";
private static final String ILLEGALSTART_PROP_PREFIX = "illegalstart.";
private static final String CONFIG_SETTINGS_FILE = "settings.illegalcharmap.properties";
private static final String PROP_DIR = "conf";
private static final String FILE_SEP = System.getProperty("file.separator");
private String[] requiredIllegalChars = {
"colon",
"semicolon",
"lessthan",
"greaterthan",
"at",
"forwardslash",
"backslash",
"pipe",
"hash",
"leftbracket",
"rightbracket",
"leftcurlybrace",
"rightcurlybrace",
"carat",
};
private String[] requiredIllegalStartChars = {
"dollar",
"twodots", //..
"tilde",
};
private HashMap<String,String> illegalKeyValues = null;
private HashSet<String> illegalPagenames = null;
/**
* if true, the @ character will be allowed to remain.
* This is useful for links that might have the same syntax
* as Confluence's shortcut links.
* Currently the only way to set this is to put the following property
* in the converter properties file:
* wiki.xxxx.allow-at-in-links.property=true
* That property will only affect links. It will not affect pagenames
*/
private boolean allowAt = false;
/**
* if true, the ~ character will be allowed to remain.
* This is useful for converting to user profile links.
* Currently the only way to set this is to put the following property
* in the converter properties file:
* wiki.xxxx.allow-tilde-in-links.property=true
* That property will only affect links. It will not affect pagenames
*/
private boolean allowTilde = false;
/* public methods */
public IllegalNameConverter() {
buildIllegalKeyValueMap();
}
/**
* looks to see if the given input is illegal
* and returns that input with legal equivalents
* @param input the input to examine for illegal characters
* @return input with legal equivalent
*/
public String convertIllegalName(String input) {
//Create map of replacements
List<IllegalChar> illegalChars = getIllegalCharObjects();
//Look for illegal characters
String legal = searchAndReplaceIllegalChars(input, illegalChars);
//replace illegal characters using replacement map
return legal;
}
/* private or protected - Helper methods */
/**
* creates the key-value map of illegal characters
*/
private void buildIllegalKeyValueMap() {
illegalKeyValues = new HashMap<String, String>();
illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "colon" + ILLEGALCHAR_PROP_SUFFIX, ":");
illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "semicolon" + ILLEGALCHAR_PROP_SUFFIX, ";");
illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "lessthan" + ILLEGALCHAR_PROP_SUFFIX, "<");
illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "greaterthan" + ILLEGALCHAR_PROP_SUFFIX, ">");
illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "at" + ILLEGALCHAR_PROP_SUFFIX, "@");
illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "forwardslash" + ILLEGALCHAR_PROP_SUFFIX, "/");
illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "backslash" + ILLEGALCHAR_PROP_SUFFIX, "\\");
illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "pipe" + ILLEGALCHAR_PROP_SUFFIX, "|");
illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "hash" + ILLEGALCHAR_PROP_SUFFIX, "#");
illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "leftbracket" + ILLEGALCHAR_PROP_SUFFIX, "[");
illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "rightbracket" + ILLEGALCHAR_PROP_SUFFIX, "]");
illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "leftcurlybrace" + ILLEGALCHAR_PROP_SUFFIX, "{");
illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "rightcurlybrace" + ILLEGALCHAR_PROP_SUFFIX, "}");
illegalKeyValues.put(ILLEGALCHAR_PROP_PREFIX + "carat" + ILLEGALCHAR_PROP_SUFFIX, "^");
illegalKeyValues.put(ILLEGALSTART_PROP_PREFIX + "dollar" + ILLEGALCHAR_PROP_SUFFIX, "$");
illegalKeyValues.put(ILLEGALSTART_PROP_PREFIX + "twodots" + ILLEGALCHAR_PROP_SUFFIX, "..");
illegalKeyValues.put(ILLEGALSTART_PROP_PREFIX + "tilde" + ILLEGALCHAR_PROP_SUFFIX, "~");
}
/**
* @return List of IllegalChar objects representing all of the
* illegal pagename characters that Confluence forbids.
*/
protected List<IllegalChar> getIllegalCharObjects() {
List<IllegalChar> illegalChars = new Vector<IllegalChar>();
TreeMap<String, String> properties = getIllegalCharProperties();
illegalChars.addAll(getIllegalAnywhere(properties, requiredIllegalChars));
illegalChars.addAll(getIllegalStarting(properties, requiredIllegalStartChars));
return illegalChars;
}
/**
* @return a map of the illegalchar properties from the
* settings.illegalcharmap.properties file
*/
protected TreeMap<String, String> getIllegalCharProperties() {
String propLocation = PROP_DIR + FILE_SEP + CONFIG_SETTINGS_FILE;
TreeMap<String, String> properties = null;
try {
properties = PropertyFileManager.loadPropertiesFile(propLocation);
} catch (IOException e) {
log.error("Problem loading properties file: " + propLocation);
e.printStackTrace();
}
return properties;
}
/**
* @param properties
* @param required
* @return
*/
protected Vector<IllegalChar> getIllegalAnywhere(
TreeMap<String, String> properties, String[] required) {
Vector<IllegalChar> illegals = new Vector<IllegalChar>();
for (int i = 0; i < required.length; i++) {
String baseKey = required[i];
if (this.allowAt && "at".equals(baseKey)) continue;
String key = ILLEGALCHAR_PROP_PREFIX +
baseKey +
ILLEGALCHAR_PROP_SUFFIX;
IllegalChar illegal =
createIllegalChar(properties, key, IllegalChar.Type.ANYWHERE);
illegals.add(illegal);
}
return illegals;
}
/**
* creates an IllegalChar object for the given key and type,
* using the give properties
* @param properties customized properties from the settings.illegalcharmap.properties
* @param key a key that is referenced in the given properties
* @param type expected IllegalChar type (ANYWHERE, or START_ONLY)
* @return an IllegalChar object
*/
protected IllegalChar createIllegalChar(
TreeMap<String, String> properties, String key, IllegalChar.Type type) {
String replacement = null;
if (properties.containsKey(key)) {
replacement = properties.get(key);
replacement = (isLegalReplacement(replacement, type))?
replacement:
getDefaultReplacement();
}
else {
replacement = getDefaultReplacement();
}
String value = getIllegalCharValue(key);
IllegalChar illegal =
new IllegalChar(value, replacement, type);
return illegal;
}
private String illegalChars = " : ; < > @ / \\ | # [ ] { } ^ ";
private String illegalStart = " $ .. ~ ";
/**
* determins if a candidate replacement for the given IllegalChar.Type
* is a legal replacement
* @param replacement candidate replacement
* @param type ANYWHERE or START_ONLY
* @return true if the replacement is legal
*/
protected boolean isLegalReplacement(String replacement, Type type) {
Pattern oneCharPattern = getReplacementPattern(replacement);
String input = (type == IllegalChar.Type.START_ONLY)?illegalStart:illegalChars;
Matcher illegalFinder = oneCharPattern.matcher(input);
boolean notFound = ! (illegalFinder.find());
return notFound;
}
/**
* creates a regex Pattern for finding the given replacement in a string
* @param a char or series of chars that could be used as a replacement
* for some IllegalChar
* @return Pattern with the given replacement character seperated by spaces.
* It will be useful for comparing a candidate replacment against
* strings illegalChars and illegalStart
*/
private Pattern getReplacementPattern(String replacement) {
String delim = "\\";
String oneChar =
" " + //a space
delim + //escape the next char
replacement +
" "; //a space
Pattern oneCharPattern = null;
try {
oneCharPattern = Pattern.compile(oneChar);
} catch (PatternSyntaxException e ) { //no backslash necessary
oneChar = " " + replacement + " "; //try without delim
oneCharPattern = Pattern.compile(oneChar);
}
return oneCharPattern;
}
/**
* @return the default replacement char. (Used if
* a customized replacement char is illegal itsel.f)
*/
protected String getDefaultReplacement() {
return DEFAULT_REPLACEMENT;
}
/**
* @param key gets the current character value for
* a given illegalchar key.
* Example: input = illegalchar.colon.replacement
* return value = ":", or return the key, if no such value exists
* @return value for a given IllegalChar, given a key
*/
protected String getIllegalCharValue(String key) {
String val = this.illegalKeyValues.get(key);
val = (val == null)?key:val;
val = ("".equals(val))?key:val;
return val;
}
/**
* creates a Vector of IllegalChar objects which describe
* those of the START_ONLY type.
* @param properties key-value paired properteis from settings.illegalcharmap.properties
* @param required array of base keys ("colon", "greaterthan", etc.)
* @return Vector of IllegalChar objects
*/
protected Vector<IllegalChar> getIllegalStarting(
TreeMap<String, String> properties, String[] required) {
Vector<IllegalChar> illegals = new Vector<IllegalChar>();
for (int i = 0; i < required.length; i++) {
String baseKey = required[i];
if (this.allowTilde && "tilde".equals(baseKey)) continue;
String key = ILLEGALSTART_PROP_PREFIX +
baseKey +
ILLEGALCHAR_PROP_SUFFIX;
IllegalChar illegal =
createIllegalChar(properties, key, IllegalChar.Type.START_ONLY);
illegals.add(illegal);
}
return illegals;
}
/**
* Examines the given input, decodes and URL entities, and replaces each instance of
* and illegal character with its designated replacement
* @param input the given input to be searched
* @param illegalChars a list of IllegalChar objects representing the chars that must
* be replaced
* @return the input with all of the proper replacements
*/
protected String searchAndReplaceIllegalChars(String input, List<IllegalChar> illegalChars) {
String legal = input;
if (shouldUrlDecode()) {
log.debug("undecoded filename = " + legal); //DELETE
legal = decodeUrl(legal);
log.debug("decoded filename = " + legal);
}
for (IllegalChar illegal : illegalChars) {
legal = illegal.getReplacement(legal);
}
return legal;
}
private boolean shouldUrlDecode() {
Properties props = getProperties();
String setting = (String) props.get(URLDECODE_KEY);
if (setting == null) return false;
if (setting.equals("true")) return true;
return false;
}
/**
* decodes any URL entities in the given input
* @param input
* @return input with any URL entities decoded.
*/
public String decodeUrl(String input) {
String encoding = "utf-8";
if (input == null) {
log.info("decodeUrl: input is null.");
return input;
}
try {
/* we have to use the apache decoder,
* as URLDecoder.decode is unreliable.
* See: uwc-4, and
* See: com.atlassian.uwc.converters.mediawiki.DecodeEntities
*/
return URIUtil.decode(input, encoding);
} catch (URIException e) {
log.error("Problem with URL decoding:\n" +
"input = " +input +"\n" +
"encoding = " + encoding);
e.printStackTrace();
}
return input;
}
/**
* @return set of illegal pagenames that have been found in the
* process of running this converter.
*/
public HashSet<String> getIllegalPagenames() {
return this.illegalPagenames;
}
/**
* @param illegalNames assign set of illegal pagenames
* that have been found in a previous conversion.
* Useful for finding links to illegal pagenames.
*/
public void setIllegalPagenames(HashSet<String> illegalNames) {
this.illegalPagenames = illegalNames;
}
/**
* adds the given input to the illegal pagenames object
* for use in the future.
* Useful for finding links to illegal pagenames.
* @param input an illegal pagename
*/
public void addIllegalPagename(String input) {
if (this.illegalPagenames == null)
this.illegalPagenames = new HashSet<String>();
this.illegalPagenames.add(input);
}
private String illegalCharClass =
"[" + //start char class
":;<>@/|#" +//chars
"\\[" + //left bracket, still part of char class
"\\]" + //actual right bracket, still part of char class
"{}^" + //some more chars
"]" + //end char class
"|" + //or
"\\\\"; //a backslash (too many issues with fitting it into the char class)
private String illegalStartClass =
"[" + //start char class
"$~" + //dollar or tilde
"]" + //end char class
"|" + //or
"\\.\\."; //two dots
Pattern illegalPattern = Pattern.compile(illegalCharClass);
Pattern illegalStartPattern = Pattern.compile(illegalStartClass);
Pattern entityPattern = Pattern.compile("%..");
/**
* @param input
* @return true if the given input contains illegal characters
* or URL entities (which could contain illegal characters)
*/
protected boolean illegal(String input) {
Matcher illegalStarter = illegalStartPattern.matcher(input);
Matcher illegalFinder = illegalPattern.matcher(input);
Matcher entityFinder = entityPattern.matcher(input);
return illegalStarter.lookingAt() || illegalFinder.find() || entityFinder.find();
}
public void setAllowAt(boolean allow) {
this.allowAt = allow;
}
public void setAllowTilde(boolean allow) {
this.allowTilde = allow;
}
}