SLING-7323 - Optimise URL handling

* added better decoding for URLs
diff --git a/pom.xml b/pom.xml
index f3cd227..e8dd6e4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -272,6 +272,12 @@
             <scope>provided</scope>
         </dependency>
         <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+            <version>3.6</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
             <groupId>junit</groupId>
             <artifactId>junit</artifactId>
         </dependency>
@@ -297,6 +303,12 @@
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-simple</artifactId>
         </dependency>
+        <dependency>
+            <groupId>org.apache.sling</groupId>
+            <artifactId>org.apache.sling.testing.sling-mock</artifactId>
+            <version>2.2.14</version>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 
 </project>
diff --git a/src/main/java/org/apache/sling/xss/impl/XSSAPIImpl.java b/src/main/java/org/apache/sling/xss/impl/XSSAPIImpl.java
index da12d5e..4c62098 100644
--- a/src/main/java/org/apache/sling/xss/impl/XSSAPIImpl.java
+++ b/src/main/java/org/apache/sling/xss/impl/XSSAPIImpl.java
@@ -18,6 +18,9 @@
 
 import java.io.StringReader;
 import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.nio.charset.StandardCharsets;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.regex.Matcher;
@@ -29,6 +32,8 @@
 import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;
 
+import org.apache.commons.lang3.StringEscapeUtils;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.sling.xss.ProtectionContext;
 import org.apache.sling.xss.XSSAPI;
 import org.apache.sling.xss.XSSFilter;
@@ -222,27 +227,36 @@
     @Override
     @Nonnull
     public String getValidHref(final String url) {
-        if (url != null && url.length() > 0) {
-            // Percent-encode characters that are not allowed in unquoted
-            // HTML attributes: ", ', >, <, ` and space. We don't encode =
-            // since this would break links with query parameters.
-            String encodedUrl = url.replaceAll("\"", "%22")
-                    .replaceAll("'", "%27")
-                    .replaceAll(">", "%3E")
-                    .replaceAll("<", "%3C")
-                    .replaceAll("`", "%60")
-                    .replaceAll(" ", "%20");
-            int qMarkIx = encodedUrl.indexOf('?');
-            if (qMarkIx > 0) {
-                encodedUrl = encodedUrl.substring(0, qMarkIx) + encodedUrl.substring(qMarkIx).replaceAll(":", "%3A");
-            }
+        if (StringUtils.isNotEmpty(url)) {
+            try {
+                String unescapedURL = URLDecoder.decode(url, StandardCharsets.UTF_8.name());
+                /*
+                    StringEscapeUtils is deprecated starting with version 3.6 of commons-lang3, however the indicated replacement comes from
+                    commons-text, which is not an OSGi bundle
+                */
+                unescapedURL = StringEscapeUtils.unescapeXml(unescapedURL);
+                // Percent-encode characters that are not allowed in unquoted
+                // HTML attributes: ", ', >, <, ` and space. We don't encode =
+                // since this would break links with query parameters.
+                String encodedUrl = unescapedURL.replaceAll("\"", "%22")
+                        .replaceAll("'", "%27")
+                        .replaceAll(">", "%3E")
+                        .replaceAll("<", "%3C")
+                        .replaceAll("`", "%60")
+                        .replaceAll(" ", "%20");
+                int qMarkIx = encodedUrl.indexOf('?');
+                if (qMarkIx > 0) {
+                    encodedUrl = encodedUrl.substring(0, qMarkIx) + encodedUrl.substring(qMarkIx).replaceAll(":", "%3A");
+                }
 
-            encodedUrl = mangleNamespaces(encodedUrl);
-            if (xssFilter.isValidHref(encodedUrl)) {
-                return encodedUrl;
+                encodedUrl = mangleNamespaces(encodedUrl);
+                if (xssFilter.isValidHref(encodedUrl)) {
+                    return encodedUrl;
+                }
+            } catch (UnsupportedEncodingException e) {
+                LOGGER.error("Unable to decode url: {}.", url);
             }
         }
-
         // fall through to empty string
         return "";
     }
diff --git a/src/main/java/org/apache/sling/xss/impl/XSSFilterImpl.java b/src/main/java/org/apache/sling/xss/impl/XSSFilterImpl.java
index f395d01..2c5571e 100644
--- a/src/main/java/org/apache/sling/xss/impl/XSSFilterImpl.java
+++ b/src/main/java/org/apache/sling/xss/impl/XSSFilterImpl.java
@@ -17,6 +17,9 @@
 package org.apache.sling.xss.impl;
 
 import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
@@ -26,6 +29,8 @@
 
 import javax.annotation.Nonnull;
 
+import org.apache.commons.lang3.StringEscapeUtils;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.sling.api.resource.LoginException;
 import org.apache.sling.api.resource.Resource;
 import org.apache.sling.api.resource.ResourceResolver;
@@ -120,6 +125,27 @@
 
     @Override
     public boolean isValidHref(String url) {
+        if (StringUtils.isEmpty(url)) {
+            return true;
+        }
+        try {
+            String decodedURL = URLDecoder.decode(url, StandardCharsets.UTF_8.name());
+            /*
+                StringEscapeUtils is deprecated starting with version 3.6 of commons-lang3, however the indicated replacement comes from
+                commons-text, which is not an OSGi bundle
+             */
+            String xmlDecodedURL = StringEscapeUtils.unescapeXml(decodedURL);
+            if (xmlDecodedURL.equals(url) || xmlDecodedURL.equals(decodedURL)) {
+                return runHrefValidation(url);
+            }
+            return runHrefValidation(xmlDecodedURL);
+        } catch (UnsupportedEncodingException e) {
+            logger.error("Unable to decode url: {}.", url);
+        }
+        return false;
+    }
+
+    private boolean runHrefValidation(@Nonnull String url) {
         // Same logic as in org.owasp.validator.html.scan.MagicSAXFilter.startElement()
         boolean isValid = hrefAttribute.containsAllowedValue(url.toLowerCase());
         if (!isValid) {
diff --git a/src/test/java/org/apache/sling/xss/impl/XSSAPIImplTest.java b/src/test/java/org/apache/sling/xss/impl/XSSAPIImplTest.java
index 9590dc8..e0a8ba4 100644
--- a/src/test/java/org/apache/sling/xss/impl/XSSAPIImplTest.java
+++ b/src/test/java/org/apache/sling/xss/impl/XSSAPIImplTest.java
@@ -217,6 +217,19 @@
         String[][] testData = {
                 //         Href                                        Expected Result
                 //
+                {
+                    "/base?backHref=%26%23x6a%3b%26%23x61%3b%26%23x76%3b%26%23x61%3b%26%23x73%3b%26%23x63%3b%26%23x72%3b%26%23x69%3b%26%23x70%3b%26%23x74%3b%26%23x3a%3balert%281%29",
+                    "/base?backHref=javascript%3Aalert(1)"
+                },
+                {
+                    "%26%23x6a%3b%26%23x61%3b%26%23x76%3b%26%23x61%3b%26%23x73%3b%26%23x63%3b%26%23x72%3b%26%23x69%3b%26%23x70%3b%26%23x74%3b%26%23x3a%3balert%281%29",
+                    ""
+                },
+                {
+                    "&#x6a;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3a;alert(1)",
+                    ""
+                },
+                {"%2Fscripts%2Ftest.js", "/scripts/test.js"},
                 {"/etc/commerce/collections/中文", "/etc/commerce/collections/中文"},
                 {"/etc/commerce/collections/\u09aa\u09b0\u09c0\u0995\u09cd\u09b7\u09be\u09ae\u09c2\u09b2\u0995", "/etc/commerce/collections/\u09aa\u09b0\u09c0\u0995\u09cd\u09b7\u09be\u09ae\u09c2\u09b2\u0995"},
                 {null, ""},
diff --git a/src/test/java/org/apache/sling/xss/impl/XSSFilterImplTest.java b/src/test/java/org/apache/sling/xss/impl/XSSFilterImplTest.java
new file mode 100644
index 0000000..b628184
--- /dev/null
+++ b/src/test/java/org/apache/sling/xss/impl/XSSFilterImplTest.java
@@ -0,0 +1,67 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl;
+
+import org.apache.sling.serviceusermapping.ServiceUserMapped;
+import org.apache.sling.testing.mock.sling.junit.SlingContext;
+import org.apache.sling.xss.XSSFilter;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.mock;
+
+public class XSSFilterImplTest {
+
+    @Rule
+    public SlingContext context = new SlingContext();
+
+    private XSSFilter xssFilter;
+
+    @Before
+    public void setUp() {
+        context.registerService(ServiceUserMapped.class, mock(ServiceUserMapped.class));
+        context.registerInjectActivateService(new XSSFilterImpl());
+        xssFilter = context.getService(XSSFilter.class);
+    }
+
+    @After
+    public void tearDown() {
+        xssFilter = null;
+    }
+
+    @Test
+    public void isValidHref() {
+        checkIsValid("javascript:alert(1)", false);
+        checkIsValid("", true);
+        checkIsValid("%26%23x6a%3b%26%23x61%3b%26%23x76%3b%26%23x61%3b%26%23x73%3b%26%23x63%3b%26%23x72%3b%26%23x69%3b%26%23x70%3b%26%23x74%3b%26%23x3a%3balert%281%29", false);
+        checkIsValid("&#x6a;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3a;alert(1)", false);
+    }
+
+    private void checkIsValid(String input, boolean valid) {
+        if (valid) {
+            assertTrue("Expected valid href value for: " + input, xssFilter.isValidHref(input));
+        } else {
+            assertFalse("Expected invalid href value for: " + input, xssFilter.isValidHref(input));
+        }
+    }
+
+}