blob: 15be20716539f717cf18a7630baaf9faa33d62d6 [file] [log] [blame]
package org.apache.ofbiz.base.html;
import java.util.regex.Pattern;
import org.owasp.html.HtmlPolicyBuilder;
import org.owasp.html.PolicyFactory;
import com.google.common.base.Predicate;
/**
* Based on the <a href=
* "http://www.owasp.org/index.php/Category:OWASP_AntiSamy_Project#Stage_2_-_Choosing_a_base_policy_file">AntiSamy
* EBay example</a>. eBay (http://www.ebay.com/) is the most
* popular online auction site in the universe, as far as I can tell. It is a
* public site so anyone is allowed to post listings with rich HTML content.
* It's not surprising that given the attractiveness of eBay as a target that it
* has been subject to a few complex XSS attacks. Listings are allowed to
* contain much more rich content than, say, Slashdot- so it's attack surface is
* considerably larger. The following tags appear to be accepted by eBay (they
* don't publish rules): {@code <a>},...
*/
public class CustomPermissivePolicy implements SanitizerCustomPolicy {
// Some common regular expression definitions.
// The 16 colors defined by the HTML Spec (also used by the CSS Spec)
private static final Pattern COLOR_NAME = Pattern.compile(
"(?:aqua|black|blue|fuchsia|gray|grey|green|lime|maroon|navy|olive|purple"
+ "|red|silver|teal|white|yellow)");
// HTML/CSS Spec allows 3 or 6 digit hex to specify color
private static final Pattern COLOR_CODE = Pattern.compile(
"(?:#(?:[0-9a-fA-F]{3}(?:[0-9a-fA-F]{3})?))");
private static final Pattern NUMBER_OR_PERCENT = Pattern.compile(
"[0-9]+%?");
private static final Pattern PARAGRAPH = Pattern.compile(
"(?:[\\p{L}\\p{N},'\\.\\s\\-_\\(\\)]|&[0-9]{2};)*");
private static final Pattern HTML_ID = Pattern.compile(
"[a-zA-Z0-9\\:\\-_\\.]+");
// force non-empty with a '+' at the end instead of '*'
private static final Pattern HTML_TITLE = Pattern.compile(
"[\\p{L}\\p{N}\\s\\-_',:\\[\\]!\\./\\\\\\(\\)&]*");
private static final Pattern HTML_CLASS = Pattern.compile(
"[a-zA-Z0-9\\s,\\-_]+");
private static final Pattern ONSITE_URL = Pattern.compile(
"(?:[\\p{L}\\p{N}\\\\\\.\\#@\\$%\\+&;\\-_~,\\?=/!]+|\\#(\\w)+)");
private static final Pattern OFFSITE_URL = Pattern.compile(
"\\s*(?:(?:ht|f)tps?://|mailto:)[\\p{L}\\p{N}]"
+ "[\\p{L}\\p{N}\\p{Zs}\\.\\#@\\$%\\+&;:\\-_~,\\?=/!\\(\\)]*+\\s*");
private static final Pattern NUMBER = Pattern.compile(
"[+-]?(?:(?:[0-9]+(?:\\.[0-9]*)?)|\\.[0-9]+)");
private static final Pattern NAME = Pattern.compile("[a-zA-Z0-9\\-_\\$]+");
private static final Pattern ALIGN = Pattern.compile(
"(?i)center|left|right|justify|char");
private static final Pattern VALIGN = Pattern.compile(
"(?i)baseline|bottom|middle|top");
private static final Predicate<String> COLOR_NAME_OR_COLOR_CODE = matchesEither(COLOR_NAME, COLOR_CODE);
private static final Predicate<String> ONSITE_OR_OFFSITE_URL = matchesEither(ONSITE_URL, OFFSITE_URL);
private static final Pattern HISTORY_BACK = Pattern.compile(
"(?:javascript:)?\\Qhistory.go(-1)\\E");
private static final Pattern ONE_CHAR = Pattern.compile(
".?", Pattern.DOTALL);
/**
* A policy that can be used to produce policies that sanitize to HTML sinks via
* {@link PolicyFactory#apply}.
*/
public static final PolicyFactory POLICY_DEFINITION = new HtmlPolicyBuilder()
.allowAttributes("id").matching(HTML_ID).globally()
.allowAttributes("class").matching(HTML_CLASS).globally()
.allowAttributes("lang").matching(Pattern.compile("[a-zA-Z]{2,20}"))
.globally()
.allowAttributes("title").matching(HTML_TITLE).globally()
.allowStyling()
.allowAttributes("align").matching(ALIGN).onElements("p")
.allowAttributes("for").matching(HTML_ID).onElements("label")
.allowAttributes("color").matching(COLOR_NAME_OR_COLOR_CODE)
.onElements("font")
.allowAttributes("face")
.matching(Pattern.compile("[\\w;, \\-]+"))
.onElements("font")
.allowAttributes("size").matching(NUMBER).onElements("font")
.allowAttributes("href").matching(ONSITE_OR_OFFSITE_URL)
.onElements("a")
.allowStandardUrlProtocols()
.allowAttributes("nohref").onElements("a")
.allowAttributes("target").matching(NAME).onElements("a")
.allowAttributes("name").matching(NAME).onElements("a")
.allowAttributes(
"onfocus", "onblur", "onclick", "onmousedown", "onmouseup")
.matching(HISTORY_BACK).onElements("a")
.requireRelNofollowOnLinks()
.allowAttributes("src").matching(ONSITE_OR_OFFSITE_URL)
.onElements("img")
.allowAttributes("name").matching(NAME)
.onElements("img")
.allowAttributes("alt").matching(PARAGRAPH)
.onElements("img")
.allowAttributes("border", "hspace", "vspace").matching(NUMBER)
.onElements("img")
.allowAttributes("border", "cellpadding", "cellspacing")
.matching(NUMBER).onElements("table")
.allowAttributes("bgcolor").matching(COLOR_NAME_OR_COLOR_CODE)
.onElements("table")
.allowAttributes("background").matching(ONSITE_URL)
.onElements("table")
.allowAttributes("background").matching(ONSITE_URL)
.onElements("td", "th", "tr")
.allowAttributes("align").matching(ALIGN)
.onElements("table")
.allowAttributes("noresize").matching(Pattern.compile("(?i)noresize"))
.onElements("table")
.allowAttributes("bgcolor").matching(COLOR_NAME_OR_COLOR_CODE)
.onElements("td", "th")
.allowAttributes("abbr").matching(PARAGRAPH)
.onElements("td", "th")
.allowAttributes("axis", "headers").matching(NAME)
.onElements("td", "th")
.allowAttributes("scope")
.matching(Pattern.compile("(?i)(?:row|col)(?:group)?"))
.onElements("td", "th")
.allowAttributes("nowrap")
.onElements("td", "th")
.allowAttributes("height", "width").matching(NUMBER_OR_PERCENT)
.onElements("table", "td", "th", "tr", "img")
.allowAttributes("align").matching(ALIGN)
.onElements("thead", "tbody", "tfoot", "img",
"td", "th", "tr", "colgroup", "col")
.allowAttributes("valign").matching(VALIGN)
.onElements("thead", "tbody", "tfoot",
"td", "th", "tr", "colgroup", "col")
.allowAttributes("charoff").matching(NUMBER_OR_PERCENT)
.onElements("td", "th", "tr", "colgroup", "col",
"thead", "tbody", "tfoot")
.allowAttributes("char").matching(ONE_CHAR)
.onElements("td", "th", "tr", "colgroup", "col",
"thead", "tbody", "tfoot")
.allowAttributes("colspan", "rowspan").matching(NUMBER)
.onElements("td", "th")
.allowAttributes("span", "width").matching(NUMBER_OR_PERCENT)
.onElements("colgroup", "col")
.allowElements(
"a", "label", "noscript", "h1", "h2", "h3", "h4", "h5", "h6", "hr",
"p", "i", "b", "u", "strong", "em", "small", "big", "pre", "code",
"cite", "samp", "sub", "sup", "strike", "center", "blockquote",
"hr", "br", "col", "font", "map", "span", "div", "img",
"ul", "ol", "li", "dd", "dt", "dl", "tbody", "thead", "tfoot",
"table", "td", "th", "tr", "colgroup", "fieldset", "legend", "header",
"picture", "source", "section", "nav", "footer")
.toFactory();
/**
* Constructs a predicate checking if a string matches any of the two provided patterns.
*
* @param a the first pattern
* @param b the second pattern
* @return a predicate checking if a string matches either {@code a} or {@code b}
*/
private static Predicate<String> matchesEither(Pattern a, Pattern b) {
return str -> a.matcher(str).matches() || b.matcher(str).matches();
}
@Override
public PolicyFactory getSanitizerPolicy() {
return POLICY_DEFINITION;
}
}