SLING-9613 - java.lang.StackOverflowError in XSSFilterImpl.filter for long URLs

* added a fallback mechanism that should rerun the filtering with
simplified regular expressions for validating URLs
diff --git a/src/main/java/org/apache/sling/xss/impl/FallbackATag.java b/src/main/java/org/apache/sling/xss/impl/FallbackATag.java
new file mode 100644
index 0000000..a94de65
--- /dev/null
+++ b/src/main/java/org/apache/sling/xss/impl/FallbackATag.java
@@ -0,0 +1,79 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+
+import org.owasp.validator.html.model.Attribute;
+import org.owasp.validator.html.model.Tag;
+
+public class FallbackATag extends Tag {
+
+    static final Attribute FALLBACK_HREF_ATTRIBUTE = new Attribute(
+            "href",
+            Arrays.asList(
+                    XSSFilterImpl.ON_SITE_SIMPLIFIED,
+                    XSSFilterImpl.OFF_SITE_SIMPLIFIED
+            ),
+            Collections.emptyList(),
+            "removeAttribute", ""
+    );
+
+    private final Tag wrapped;
+
+    public FallbackATag(Tag wrapped) {
+        super("a", new HashMap<>(), "validate");
+        this.wrapped = wrapped;
+    }
+
+    @Override
+    public String getAction() {
+        return wrapped.getAction();
+    }
+
+    @Override
+    public boolean isAction(String action) {
+        return wrapped.isAction(action);
+    }
+
+    @Override
+    public Tag mutateAction(String action) {
+        return wrapped.mutateAction(action);
+    }
+
+    @Override
+    public String getRegularExpression() {
+        return wrapped.getRegularExpression();
+    }
+
+    @Override
+    public String getName() {
+        return wrapped.getName();
+    }
+
+    @Override
+    public Attribute getAttributeByName(String name) {
+        if ("href".equalsIgnoreCase(name)) {
+            return FALLBACK_HREF_ATTRIBUTE;
+        }
+        return wrapped.getAttributeByName(name);
+    }
+}
diff --git a/src/main/java/org/apache/sling/xss/impl/FallbackSlingPolicy.java b/src/main/java/org/apache/sling/xss/impl/FallbackSlingPolicy.java
new file mode 100644
index 0000000..ddf4d0d
--- /dev/null
+++ b/src/main/java/org/apache/sling/xss/impl/FallbackSlingPolicy.java
@@ -0,0 +1,55 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl;
+
+import java.io.InputStream;
+
+import org.owasp.validator.html.InternalPolicy;
+import org.owasp.validator.html.PolicyException;
+import org.owasp.validator.html.model.Tag;
+import org.xml.sax.InputSource;
+
+public class FallbackSlingPolicy extends InternalPolicy {
+
+    private FallbackATag fallbackATag;
+    private final Object aTagLock = new Object();
+
+    public FallbackSlingPolicy(InputStream inputStream) throws PolicyException {
+       super(null, getSimpleParseContext(getTopLevelElement(new InputSource(inputStream))));
+
+    }
+
+    @Override
+    public Tag getTagByLowercaseName(String tagName) {
+        if ("a".equalsIgnoreCase(tagName)) {
+            synchronized (aTagLock) {
+                if (fallbackATag == null) {
+                    Tag wrapped = super.getTagByLowercaseName(tagName);
+                    if (wrapped != null) {
+                        fallbackATag = new FallbackATag(wrapped);
+                    }
+                }
+            }
+            if (fallbackATag != null) {
+                return fallbackATag;
+            }
+        }
+        return super.getTagByLowercaseName(tagName);
+    }
+}
diff --git a/src/main/java/org/apache/sling/xss/impl/HtmlToHtmlContentContext.java b/src/main/java/org/apache/sling/xss/impl/HtmlToHtmlContentContext.java
index 1a2f65b..7e0535a 100644
--- a/src/main/java/org/apache/sling/xss/impl/HtmlToHtmlContentContext.java
+++ b/src/main/java/org/apache/sling/xss/impl/HtmlToHtmlContentContext.java
@@ -49,12 +49,8 @@
             try {
                 Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
                 return policyHandler.getAntiSamy().scan(str).getNumberOfErrors() == 0;
-            } catch (final ScanException se) {
-                log.warn("Unable to scan input.", se);
-                log.debug("Provided input: {}", str);
-            } catch (final PolicyException pe) {
-                log.warn("Unable to check input.", pe);
-                log.debug("Provided input: {}", str);
+            } catch (final Exception se) {
+                logError(se, str);
             } finally {
                 Thread.currentThread().setContextClassLoader(tccl);
             }
@@ -70,24 +66,18 @@
         if (StringUtils.isNotEmpty(str)) {
             ClassLoader tccl = Thread.currentThread().getContextClassLoader();
             try {
-                Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
-                log.debug("Protecting (HTML -> HTML) :\n{}", str);
-                final CleanResults results = policyHandler.getAntiSamy().scan(str);
-                final String cleaned = results.getCleanHTML();
-                @SuppressWarnings("unchecked")
-                final List<String> errors = results.getErrorMessages();
-                for (final String error : errors) {
-                    log.info("AntiSamy warning: {}", error);
+                final CleanResults  results = getCleanResults(policyHandler, str);
+                if (results != null) {
+                    final String cleaned = results.getCleanHTML();
+                    final List<String> errors = results.getErrorMessages();
+                    for (final String error : errors) {
+                        log.info("AntiSamy warning: {}", error);
+                    }
+                    log.debug("Protected (HTML -> HTML):\n{}", cleaned);
+                    return cleaned;
                 }
-                log.debug("Protected (HTML -> HTML):\n{}", cleaned);
-
-                return cleaned;
-            } catch (final ScanException se) {
-                log.warn("Unable to scan input.", se);
-                log.debug("Provided input: {}", str);
-            } catch (final PolicyException pe) {
-                log.warn("Unable to check input.", pe);
-                log.debug("Provided input: {}", str);
+            } catch (Exception e) {
+                logError(e, str);
             } finally {
                 Thread.currentThread().setContextClassLoader(tccl);
             }
@@ -102,4 +92,21 @@
     public boolean supportsPolicy() {
         return true;
     }
+
+    private CleanResults getCleanResults(PolicyHandler handler, String input) throws ScanException, PolicyException {
+        CleanResults results;
+        try {
+            results = handler.getAntiSamy().scan(input);
+        } catch (StackOverflowError e) {
+            log.debug("Will perform a second attempt at filtering the following input due to a StackOverflowError:\n{}", input);
+            results = handler.getFallbackAntiSamy().scan(input);
+            log.debug("Second attempt was successful.");
+        }
+        return results;
+    }
+
+    private void logError(Exception e, String input) {
+        log.warn("Unable to check input.", e);
+        log.debug("Provided input: {}", input);
+    }
 }
diff --git a/src/main/java/org/apache/sling/xss/impl/PolicyHandler.java b/src/main/java/org/apache/sling/xss/impl/PolicyHandler.java
index de2f0c4..5caf40a 100644
--- a/src/main/java/org/apache/sling/xss/impl/PolicyHandler.java
+++ b/src/main/java/org/apache/sling/xss/impl/PolicyHandler.java
@@ -16,9 +16,11 @@
  ******************************************************************************/
 package org.apache.sling.xss.impl;
 
-import java.io.IOException;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.InputStream;
 
+import org.apache.commons.io.IOUtils;
 import org.owasp.validator.html.AntiSamy;
 import org.owasp.validator.html.Policy;
 
@@ -27,8 +29,10 @@
  */
 public class PolicyHandler {
 
-    private Policy policy;
+    private final Policy policy;
+    private Policy fallbackPolicy;
     private AntiSamy antiSamy;
+    private AntiSamy fallbackAntiSamy;
 
     /**
      * Creates a {@code PolicyHandler} from an {@link InputStream}.
@@ -40,18 +44,16 @@
         // (currently: http://bugs.day.com/bugzilla/show_bug.cgi?id=31946)
         Thread currentThread = Thread.currentThread();
         ClassLoader cl = currentThread.getContextClassLoader();
-        try {
+        try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
+            IOUtils.copy(policyStream, baos);
+            ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
             currentThread.setContextClassLoader(this.getClass().getClassLoader());
-            this.policy = Policy.getInstance(policyStream);
+            this.policy = Policy.getInstance(bais);
+            bais.reset();
+            this.fallbackPolicy = new FallbackSlingPolicy(bais);
             this.antiSamy = new AntiSamy(this.policy);
+            this.fallbackAntiSamy = new AntiSamy(this.fallbackPolicy);
         } finally {
-            if (policyStream != null) {
-                try {
-                    policyStream.close();
-                } catch (final IOException ioe) {
-                    // ignored as we can't do anything about this (besides logging)
-                }
-            }
             currentThread.setContextClassLoader(cl);
         }
     }
@@ -63,4 +65,8 @@
     public AntiSamy getAntiSamy() {
         return this.antiSamy;
     }
+
+    public AntiSamy getFallbackAntiSamy() {
+        return fallbackAntiSamy;
+    }
 }
diff --git a/src/main/java/org/apache/sling/xss/impl/XSSFilterImpl.java b/src/main/java/org/apache/sling/xss/impl/XSSFilterImpl.java
index 79b961e..d11d18e 100644
--- a/src/main/java/org/apache/sling/xss/impl/XSSFilterImpl.java
+++ b/src/main/java/org/apache/sling/xss/impl/XSSFilterImpl.java
@@ -131,9 +131,9 @@
     public static final String RELATIVE_REF = "(?!\\s*javascript(?::|&colon;))" + RELATIVE_PART + "?(?:\\?" + QUERY + ")?(?:#" + FRAGMENT + ")?";
     public static final String URI = SCHEME_PATTERN + ":" + HIER_PART + "(?:\\?" + QUERY + ")?(?:#" + FRAGMENT + ")?";
 
-    private static final Pattern ON_SITE_SIMPLIFIED = Pattern.compile("([\\p{L}\\p{N}\\\\\\.\\#@\\$%\\+&amp;;:\\-_~,\\?=/!\\*\\(\\)]*|\\#" +
+    static final Pattern ON_SITE_SIMPLIFIED = Pattern.compile("([\\p{L}\\p{N}\\\\\\.\\#@\\$%\\+&amp;;:\\-_~,\\?=/!\\*\\(\\)]*|\\#" +
             "(\\w)+)");
-    private static final Pattern OFF_SITE_SIMPLIFIED = Pattern.compile("(\\s)*((ht|f)tp(s?)://|mailto:)" +
+    static final Pattern OFF_SITE_SIMPLIFIED = Pattern.compile("(\\s)*((ht|f)tp(s?)://|mailto:)" +
             "[\\p{L}\\p{N}]+[\\p{L}\\p{N}\\p{Zs}\\.\\#@\\$%\\+&amp;;:\\-_~,\\?=/!\\*\\(\\)]*(\\s)*");
 
     private static final Pattern[] BACKUP_PATTERNS = new Pattern[] {ON_SITE_SIMPLIFIED, OFF_SITE_SIMPLIFIED};
@@ -151,7 +151,7 @@
                     Pattern.compile(RELATIVE_REF),
                     Pattern.compile(URI)
             ),
-            Collections.<String>emptyList(),
+            Collections.emptyList(),
             "removeAttribute", ""
     );
 
diff --git a/src/test/java/org/apache/sling/xss/impl/XSSFilterImplTest.java b/src/test/java/org/apache/sling/xss/impl/XSSFilterImplTest.java
index 4c6ead3..470b374 100644
--- a/src/test/java/org/apache/sling/xss/impl/XSSFilterImplTest.java
+++ b/src/test/java/org/apache/sling/xss/impl/XSSFilterImplTest.java
@@ -31,6 +31,7 @@
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Matchers.anyString;
 import static org.mockito.Mockito.mock;
@@ -95,6 +96,16 @@
         checkIsValid("#\">", false);
     }
 
+    @Test
+    public void testFallbackFiltering() {
+        final String longURLContext = "<a href=\"https://sling.apache.org" +
+                "/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.\">Click</a>";
+        context.registerInjectActivateService(new XSSFilterImpl());
+        xssFilter = context.getService(XSSFilter.class);
+        assertNotNull(xssFilter);
+        assertEquals(longURLContext, xssFilter.filter(longURLContext));
+    }
+
     private void checkIsValid(String input, boolean valid) {
         if (valid) {
             assertTrue("Expected valid href value for: " + input, xssFilter.isValidHref(input));