Updated HtmlCleaner to v 2.7 and removed redundant code. See WOOKIE-428
git-svn-id: https://svn.apache.org/repos/asf/wookie/trunk@1574106 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/wookie-server/pom.xml b/wookie-server/pom.xml
index a6d7efc..f58101e 100644
--- a/wookie-server/pom.xml
+++ b/wookie-server/pom.xml
@@ -146,7 +146,7 @@
<dependency>
<groupId>net.sourceforge.htmlcleaner</groupId>
<artifactId>htmlcleaner</artifactId>
- <version>2.2.1</version>
+ <version>2.7</version>
<scope>compile</scope>
</dependency>
<dependency>
diff --git a/wookie-server/src/main/java/org/apache/wookie/util/html/Html5DoctypeToken.java b/wookie-server/src/main/java/org/apache/wookie/util/html/Html5DoctypeToken.java
deleted file mode 100644
index fa536bc..0000000
--- a/wookie-server/src/main/java/org/apache/wookie/util/html/Html5DoctypeToken.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.wookie.util.html;
-
-import org.htmlcleaner.DoctypeToken;
-
-/**
- *
- * An extended HTML Cleaner DocTypeToken class to deal with HTML5 declarations better then the default, which displays emtpy strings and nulls.
- *
- * Note: <!DOCTYPE html SYSTEM "about:legacy-compat"> is also a valid HTML5 doctype - but html cleaner only makes the html
- * into uppercase, which although is still not correct, doesn't seem to cause problems in wookie at present.
- *
- * http://sourceforge.net/tracker/?func=detail&aid=3190583&group_id=183053&atid=903696
- *
- */
-public class Html5DoctypeToken extends DoctypeToken {
-
- public static String BADDOCTYPE = "<!DOCTYPE HTML null \"\">";
- public static String GOODDOCTYPE = "<!DOCTYPE html>";
-
- public Html5DoctypeToken(String part1, String part2, String part3,
- String part4) {
- super(part1, part2, part3, part4);
- }
-
- public String getContent(){
- return GOODDOCTYPE;
- }
-
-}
diff --git a/wookie-server/src/main/java/org/apache/wookie/util/html/HtmlCleaner.java b/wookie-server/src/main/java/org/apache/wookie/util/html/HtmlCleaner.java
index 96e6bf7..a15d26f 100644
--- a/wookie-server/src/main/java/org/apache/wookie/util/html/HtmlCleaner.java
+++ b/wookie-server/src/main/java/org/apache/wookie/util/html/HtmlCleaner.java
@@ -20,7 +20,6 @@
import java.util.List;
import org.htmlcleaner.CleanerProperties;
-import org.htmlcleaner.DoctypeToken;
import org.htmlcleaner.SimpleHtmlSerializer;
import org.htmlcleaner.TagNode;
@@ -85,7 +84,6 @@
// remove widget-specific scripts. These will be replaced
// after processing, so that the injected scripts come first
removeUserScripts();
- fixHTML5Doctype();
}
/* (non-Javadoc)
@@ -112,7 +110,6 @@
/* (non-Javadoc)
* @see org.apache.wookie.util.html.IHtmlProcessor#setCharset(java.lang.String)
*/
- @SuppressWarnings("unchecked")
public void setTypeAndCharset(String type, String charset) {
// NB This overrides any existing encoding information in the HTML file.
@@ -124,7 +121,7 @@
if (meta == null) {
meta = new TagNode(META_TAG);
meta.addAttribute("http-equiv", "Content-Type");
- headNode.getChildren().add(0, meta);
+ headNode.addChild(meta);
}
//
// Force UTF into lowercase
@@ -165,7 +162,6 @@
* Finds any user script imports and saves them to
* the scriptList
*/
- @SuppressWarnings("unchecked")
private void getUserScripts(){
List<TagNode> children = headNode.getChildTagList();
for(TagNode child : children){
@@ -183,19 +179,5 @@
headNode.addChild(node);
}
}
-
- /**
- * Fix for a bug in HTMLCleaner which cannot handle HTML5 doctypes correctly
- * See http://sourceforge.net/tracker/?func=detail&aid=3190583&group_id=183053&atid=903696
- */
- private void fixHTML5Doctype(){
- DoctypeToken docType = htmlNode.getDocType();
- if(docType != null){
- if(docType.getContent().equalsIgnoreCase(Html5DoctypeToken.BADDOCTYPE)){
- Html5DoctypeToken newToken = new Html5DoctypeToken("html",null,null,null);
- htmlNode.setDocType(newToken);
- }
- }
- }
}
diff --git a/wookie-server/src/main/java/org/apache/wookie/util/html/HtmlSerializer.java b/wookie-server/src/main/java/org/apache/wookie/util/html/HtmlSerializer.java
deleted file mode 100644
index d2b50ff..0000000
--- a/wookie-server/src/main/java/org/apache/wookie/util/html/HtmlSerializer.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.wookie.util.html;
-
-import java.io.IOException;
-import java.io.Writer;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import org.htmlcleaner.BaseToken;
-import org.htmlcleaner.CleanerProperties;
-import org.htmlcleaner.ContentNode;
-import org.htmlcleaner.TagNode;
-import org.htmlcleaner.XmlSerializer;
-
-/**
- * This is a custom serializer for HtmlCleaner that does not escape the content of
- * event handler attributes such as "onClick". In other respects it is identical
- * to SimpleXmlSerializer.
- */
-@Deprecated
-public class HtmlSerializer extends XmlSerializer {
-
- /**
- * The set of HTML event handler attributes
- */
- private static final String[] EVENT_HANDLERS = {"onabort","onbeforeunload","onblur","onchange","oncontextmenu","onclick","ondblclick","ondragdrop","ondrag","ondragend","ondragenter","ondragleave","ondragover","ondragstart","ondrop","onerror","onfocus","onkeydown","onkeypress","onkeyup","onload","onmessage","onmousedown","onmouseup","onmousemove","onmouseout","onmouseover","onmouseup","onmousewheel","onmove","onreset","onresize","onscroll","onselect","onstorage","onsubmit","onunload" };
-
- public HtmlSerializer(CleanerProperties props){
- super(props);
- }
-
- /**
- * Checks to see if an attribute should have its value escaped
- * @param attname the attribute name
- * @return true if the attribute shouldn't be escaped, otherwise false
- */
- protected boolean dontEscapeAttribute(String attname){
- for (String handler:EVENT_HANDLERS) if (handler.equalsIgnoreCase(attname)) return true;
- return false;
- }
-
- /**
- * We only override two lines of this method - see below
- */
- @Override
- protected void serializeOpenTag(TagNode tagNode, Writer writer, boolean newLine) throws IOException {
- String tagName = tagNode.getName();
- Map tagAtttributes = tagNode.getAttributes();
-
- writer.write("<" + tagName);
- Iterator it = tagAtttributes.entrySet().iterator();
- while (it.hasNext()) {
- Map.Entry entry = (Map.Entry) it.next();
- String attName = (String) entry.getKey();
- String attValue = (String) entry.getValue();
-
- if ( !props.isNamespacesAware() && ("xmlns".equals(attName) || attName.startsWith("xmlns:")) ) {
- continue;
- }
- // This is a line we've changed
- writer.write(" " + attName + "=\"" + (dontEscapeAttribute(attName)? attValue: escapeXml(attValue)) + "\"");
- }
-
- if ( isMinimizedTagSyntax(tagNode) ) {
- writer.write(" />");
- if (newLine) {
- writer.write("\n");
- }
- } else if (dontEscape(tagNode)) {
- // And so is this
- writer.write(">");
- } else {
- writer.write(">");
- }
- }
-
- @Override
- protected void serializeEndTag(TagNode tagNode, Writer writer, boolean newLine) throws IOException {
- String tagName = tagNode.getName();
-
- // Lets not bother with this shall we?
- //if (dontEscape(tagNode)) {
- // writer.write("]]>");
- //}
-
- writer.write( "</" + tagName + ">" );
-
- if (newLine) {
- writer.write("\n");
- }
- }
-
- /**
- * This is exactly the same as SimpleXmlSerializer.serialize, however we have to include it here as it would
- * inherit from XmlSerializer and miss out our custom dontEscapeAttribute method
- */
- @Override
- protected void serialize(TagNode tagNode, Writer writer) throws IOException {
- serializeOpenTag(tagNode, writer, false);
-
- List tagChildren = tagNode.getChildren();
- if ( !isMinimizedTagSyntax(tagNode) ) {
- Iterator childrenIt = tagChildren.iterator();
- while ( childrenIt.hasNext() ) {
- Object item = childrenIt.next();
- if (item != null) {
- if ( item instanceof ContentNode ) {
- String content = ((ContentNode) item).getContent().toString();
- writer.write( dontEscape(tagNode) ? content.replaceAll("]]>", "]]>") : escapeXml(content) );
- } else {
- ((BaseToken)item).serialize(this, writer);
- }
- }
- }
- serializeEndTag(tagNode, writer, false);
- }
- }
-
-}
diff --git a/wookie-server/src/main/java/org/apache/wookie/util/html/StartPageProcessor.java b/wookie-server/src/main/java/org/apache/wookie/util/html/StartPageProcessor.java
index bd6e26c..62accdd 100644
--- a/wookie-server/src/main/java/org/apache/wookie/util/html/StartPageProcessor.java
+++ b/wookie-server/src/main/java/org/apache/wookie/util/html/StartPageProcessor.java
@@ -28,8 +28,6 @@
*/
public class StartPageProcessor implements IStartPageProcessor {
- static final String DEFAULT_FEATURE = "http://wookie.apache.org/feature/default";
-
/* (non-Javadoc)
* @see org.apache.wookie.util.html.IStartPageProcessor#processStartFile(java.io.File, org.apache.wookie.w3c.IManifestModel)
*/
@@ -85,19 +83,6 @@
}
/**
- * Instantiates a feature for a given feature name
- * @param featureName the name of the feature to be instantiated
- * @return an IFeature instance
- * @throws Exception if the feature cannot be instantiated
- */
- @SuppressWarnings("unchecked")
- private IFeature getFeatureInstanceForName(String featureName) throws Exception{
- Class<? extends IFeature> klass = (Class<? extends IFeature>) Class.forName(featureName);
- IFeature theFeature = (IFeature) klass.newInstance();
- return theFeature;
- }
-
- /**
* Adds scripts for a given feature
* @param engine
* @param feature
diff --git a/wookie-server/src/test/java/org/apache/wookie/tests/HtmlSerializerTest.java b/wookie-server/src/test/java/org/apache/wookie/tests/HtmlSerializerTest.java
deleted file mode 100644
index 77e49aa..0000000
--- a/wookie-server/src/test/java/org/apache/wookie/tests/HtmlSerializerTest.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.wookie.tests;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.io.StringWriter;
-
-import org.apache.wookie.util.html.HtmlSerializer;
-import org.htmlcleaner.CleanerProperties;
-import org.htmlcleaner.HtmlCleaner;
-import org.htmlcleaner.TagNode;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-public class HtmlSerializerTest {
-
- static HtmlCleaner cleaner;
- static CleanerProperties properties;
-
- @BeforeClass
- public static void setup(){
- cleaner = new org.htmlcleaner.HtmlCleaner();
- // set cleaner properties
- properties = cleaner.getProperties();
- properties.setOmitDoctypeDeclaration(false);
- properties.setOmitXmlDeclaration(true);
- properties.setUseCdataForScriptAndStyle(true);
- properties.setUseEmptyElementTags(false);
- }
-
- private String parse(String content){
- StringWriter writer = new StringWriter();
- HtmlSerializer ser = new HtmlSerializer(properties);
- try {
- TagNode html = cleaner.clean(content);
- ser.writeXml(html, writer, "UTF-8");
- return writer.getBuffer().toString();
- } catch (IOException e) {
- return null;
- }
- }
-
- // tests the content of event handlers are not escaped
- @Test
- public void eventHandlerAttribute(){
- String out = parse("<body onload=\"$('#projects').dataTable();\">");
- assertEquals("<html><head></head><body onload=\"$('#projects').dataTable();\"></body></html>", out);
- }
-
- // tests that other attributes do have content escaped
- @Test
- public void otherAttr(){
- String out = parse("<body class=\"$('#projects').dataTable();\">");
- assertEquals("<html><head></head><body class=\"$('#projects').dataTable();\"></body></html>", out);
- }
-
- // tests that script tags are not encoded
- @Test
- public void scriptTag(){
- String out = parse("<script>$('#projects').dataTable();</script>");
- assertEquals("<html><head></head><body><script>$('#projects').dataTable();</script></body></html>", out);
- }
-
- // tests that non-script tags are encoded
- @Test
- public void sillyTag(){
- String out = parse("<silly>$('#projects').dataTable();</silly>");
- assertEquals("<html><head></head><body><silly>$('#projects').dataTable();</silly></body></html>", out);
- }
-
- // TODO tests for inline CSS
-
-}