Updated HtmlCleaner to v 2.7 and removed redundant code. See WOOKIE-428

git-svn-id: https://svn.apache.org/repos/asf/wookie/trunk@1574106 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/wookie-server/pom.xml b/wookie-server/pom.xml
index a6d7efc..f58101e 100644
--- a/wookie-server/pom.xml
+++ b/wookie-server/pom.xml
@@ -146,7 +146,7 @@
     <dependency>

       <groupId>net.sourceforge.htmlcleaner</groupId>

       <artifactId>htmlcleaner</artifactId>

-      <version>2.2.1</version>

+      <version>2.7</version>

       <scope>compile</scope>

     </dependency>

     <dependency>

diff --git a/wookie-server/src/main/java/org/apache/wookie/util/html/Html5DoctypeToken.java b/wookie-server/src/main/java/org/apache/wookie/util/html/Html5DoctypeToken.java
deleted file mode 100644
index fa536bc..0000000
--- a/wookie-server/src/main/java/org/apache/wookie/util/html/Html5DoctypeToken.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*

- *  Licensed under the Apache License, Version 2.0 (the "License");

- *  you may not use this file except in compliance with the License.

- *  You may obtain a copy of the License at

- *

- *      http://www.apache.org/licenses/LICENSE-2.0

- *

- *  Unless required by applicable law or agreed to in writing, software

- *  distributed under the License is distributed on an "AS IS" BASIS,

- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

- *  See the License for the specific language governing permissions and

- * limitations under the License.

- */

-package org.apache.wookie.util.html;

-

-import org.htmlcleaner.DoctypeToken;

-

-/**

- * 

- * An extended HTML Cleaner DocTypeToken class to deal with HTML5 declarations better then the default, which displays emtpy strings and nulls.

- * 

- * Note: <!DOCTYPE html SYSTEM "about:legacy-compat"> is also a valid HTML5 doctype - but html cleaner only makes the html

- * into uppercase, which although is still not correct, doesn't seem to cause problems in wookie at present.

- * 

- * http://sourceforge.net/tracker/?func=detail&aid=3190583&group_id=183053&atid=903696

- * 

- */

-public class Html5DoctypeToken extends DoctypeToken {

-	

-	public static String BADDOCTYPE = "<!DOCTYPE HTML null \"\">";

-	public static String GOODDOCTYPE = "<!DOCTYPE html>";

-

-	public Html5DoctypeToken(String part1, String part2, String part3,

-			String part4) {

-		super(part1, part2, part3, part4);		

-	}

-		

-	public String getContent(){

-		return GOODDOCTYPE;

-	}

-

-}

diff --git a/wookie-server/src/main/java/org/apache/wookie/util/html/HtmlCleaner.java b/wookie-server/src/main/java/org/apache/wookie/util/html/HtmlCleaner.java
index 96e6bf7..a15d26f 100644
--- a/wookie-server/src/main/java/org/apache/wookie/util/html/HtmlCleaner.java
+++ b/wookie-server/src/main/java/org/apache/wookie/util/html/HtmlCleaner.java
@@ -20,7 +20,6 @@
 import java.util.List;
 
 import org.htmlcleaner.CleanerProperties;
-import org.htmlcleaner.DoctypeToken;
 import org.htmlcleaner.SimpleHtmlSerializer;
 import org.htmlcleaner.TagNode;
 
@@ -85,7 +84,6 @@
 		// remove widget-specific scripts. These will be replaced
 		// after processing, so that the injected scripts come first
 		removeUserScripts();
-		fixHTML5Doctype();
 	}
 	
 	/* (non-Javadoc)
@@ -112,7 +110,6 @@
   /* (non-Javadoc)
    * @see org.apache.wookie.util.html.IHtmlProcessor#setCharset(java.lang.String)
    */
-  @SuppressWarnings("unchecked")
   public void setTypeAndCharset(String type, String charset) {
     // NB This overrides any existing encoding information in the HTML file.
     
@@ -124,7 +121,7 @@
     if (meta == null) {
       meta = new TagNode(META_TAG);
       meta.addAttribute("http-equiv", "Content-Type");
-      headNode.getChildren().add(0, meta);
+      headNode.addChild(meta);
     }
     //
     // Force UTF into lowercase
@@ -165,7 +162,6 @@
 	 * Finds any user script imports and saves them to
 	 * the scriptList
 	 */
-	@SuppressWarnings("unchecked")
 	private void getUserScripts(){
 		List<TagNode> children = headNode.getChildTagList();		
 		for(TagNode child : children){						
@@ -183,19 +179,5 @@
 			headNode.addChild(node);
 		}
 	}
-	
-	/**
-	 *  Fix for a bug in HTMLCleaner which cannot handle HTML5 doctypes correctly
-	 *  See http://sourceforge.net/tracker/?func=detail&aid=3190583&group_id=183053&atid=903696
-	 */
-	private void fixHTML5Doctype(){
-		DoctypeToken docType = htmlNode.getDocType();
-		if(docType != null){
-			if(docType.getContent().equalsIgnoreCase(Html5DoctypeToken.BADDOCTYPE)){
-				Html5DoctypeToken newToken = new Html5DoctypeToken("html",null,null,null);
-				htmlNode.setDocType(newToken);
-			}
-		}
-	}
 
 }
diff --git a/wookie-server/src/main/java/org/apache/wookie/util/html/HtmlSerializer.java b/wookie-server/src/main/java/org/apache/wookie/util/html/HtmlSerializer.java
deleted file mode 100644
index d2b50ff..0000000
--- a/wookie-server/src/main/java/org/apache/wookie/util/html/HtmlSerializer.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.wookie.util.html;
-
-import java.io.IOException;
-import java.io.Writer;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import org.htmlcleaner.BaseToken;
-import org.htmlcleaner.CleanerProperties;
-import org.htmlcleaner.ContentNode;
-import org.htmlcleaner.TagNode;
-import org.htmlcleaner.XmlSerializer;
-
-/**
- * This is a custom serializer for HtmlCleaner that does not escape the content of
- * event handler attributes such as "onClick". In other respects it is identical 
- * to SimpleXmlSerializer.
- */
-@Deprecated
-public class HtmlSerializer extends XmlSerializer {
-	
-	/**
-	 * The set of HTML event handler attributes
-	 */
-	private static final String[] EVENT_HANDLERS = {"onabort","onbeforeunload","onblur","onchange","oncontextmenu","onclick","ondblclick","ondragdrop","ondrag","ondragend","ondragenter","ondragleave","ondragover","ondragstart","ondrop","onerror","onfocus","onkeydown","onkeypress","onkeyup","onload","onmessage","onmousedown","onmouseup","onmousemove","onmouseout","onmouseover","onmouseup","onmousewheel","onmove","onreset","onresize","onscroll","onselect","onstorage","onsubmit","onunload" };
-
-	public HtmlSerializer(CleanerProperties props){
-		super(props);
-	}
-	
-	/**
-	 * Checks to see if an attribute should have its value escaped 
-	 * @param attname the attribute name
-	 * @return true if the attribute shouldn't be escaped, otherwise false
-	 */
-	protected boolean dontEscapeAttribute(String attname){
-		for (String handler:EVENT_HANDLERS) if (handler.equalsIgnoreCase(attname)) return true;
-		return false;
-	}
-	
-	/**
-	 * We only override two lines of this method - see below
-	 */
-	@Override
-    protected void serializeOpenTag(TagNode tagNode, Writer writer, boolean newLine) throws IOException {
-        String tagName = tagNode.getName();
-        Map tagAtttributes = tagNode.getAttributes();
-        
-        writer.write("<" + tagName);
-        Iterator it = tagAtttributes.entrySet().iterator();
-        while (it.hasNext()) {
-            Map.Entry entry = (Map.Entry) it.next();
-            String attName = (String) entry.getKey();
-            String attValue = (String) entry.getValue();
-            
-            if ( !props.isNamespacesAware() && ("xmlns".equals(attName) || attName.startsWith("xmlns:")) ) {
-            	continue;
-            }
-            // This is a line we've changed
-            writer.write(" " + attName + "=\"" + (dontEscapeAttribute(attName)? attValue: escapeXml(attValue)) + "\"");
-        }
-        
-        if ( isMinimizedTagSyntax(tagNode) ) {
-        	writer.write(" />");
-        	if (newLine) {
-        		writer.write("\n");
-        	}
-        } else if (dontEscape(tagNode)) {
-        	// And so is this
-        	writer.write(">");
-        } else {
-        	writer.write(">");
-        }
-    }
-	
-	@Override
-    protected void serializeEndTag(TagNode tagNode, Writer writer, boolean newLine) throws IOException {
-    	String tagName = tagNode.getName();
-    	
-    	// Lets not bother with this shall we?
-    	//if (dontEscape(tagNode)) {
-    	//	writer.write("]]>");
-    	//}
-    	
-    	writer.write( "</" + tagName + ">" );
-
-        if (newLine) {
-    		writer.write("\n");
-    	}
-    }
-
-	/**
-	 * This is exactly the same as SimpleXmlSerializer.serialize, however we have to include it here as it would
-	 * inherit from XmlSerializer and miss out our custom dontEscapeAttribute method
-	 */
-	@Override
-	protected void serialize(TagNode tagNode, Writer writer) throws IOException {
-        serializeOpenTag(tagNode, writer, false);
-
-        List tagChildren = tagNode.getChildren();
-        if ( !isMinimizedTagSyntax(tagNode) ) {
-            Iterator childrenIt = tagChildren.iterator();
-            while ( childrenIt.hasNext() ) {
-                Object item = childrenIt.next();
-                if (item != null) {
-                    if ( item instanceof ContentNode ) {
-                        String content = ((ContentNode) item).getContent().toString();
-                        writer.write( dontEscape(tagNode) ? content.replaceAll("]]>", "]]&gt;") : escapeXml(content) );
-                    } else {
-                        ((BaseToken)item).serialize(this, writer);
-                    }
-                }
-            }
-            serializeEndTag(tagNode, writer, false);
-        }
-    }
-
-}
diff --git a/wookie-server/src/main/java/org/apache/wookie/util/html/StartPageProcessor.java b/wookie-server/src/main/java/org/apache/wookie/util/html/StartPageProcessor.java
index bd6e26c..62accdd 100644
--- a/wookie-server/src/main/java/org/apache/wookie/util/html/StartPageProcessor.java
+++ b/wookie-server/src/main/java/org/apache/wookie/util/html/StartPageProcessor.java
@@ -28,8 +28,6 @@
  */
 public class StartPageProcessor implements IStartPageProcessor {
 	
-	static final String DEFAULT_FEATURE = "http://wookie.apache.org/feature/default";
-
 	/* (non-Javadoc)
 	 * @see org.apache.wookie.util.html.IStartPageProcessor#processStartFile(java.io.File, org.apache.wookie.w3c.IManifestModel)
 	 */
@@ -85,19 +83,6 @@
 	}
 	
 	/**
-	 * Instantiates a feature for a given feature name
-	 * @param featureName the name of the feature to be instantiated
-	 * @return an IFeature instance
-	 * @throws Exception if the feature cannot be instantiated
-	 */
-	@SuppressWarnings("unchecked")
-    private IFeature getFeatureInstanceForName(String featureName) throws Exception{
-		Class<? extends IFeature> klass = (Class<? extends IFeature>) Class.forName(featureName);
-		IFeature theFeature = (IFeature) klass.newInstance();
-		return theFeature;
-	}
-	
-	/**
 	 * Adds scripts for a given feature
 	 * @param engine
 	 * @param feature
diff --git a/wookie-server/src/test/java/org/apache/wookie/tests/HtmlSerializerTest.java b/wookie-server/src/test/java/org/apache/wookie/tests/HtmlSerializerTest.java
deleted file mode 100644
index 77e49aa..0000000
--- a/wookie-server/src/test/java/org/apache/wookie/tests/HtmlSerializerTest.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.wookie.tests;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.io.StringWriter;
-
-import org.apache.wookie.util.html.HtmlSerializer;
-import org.htmlcleaner.CleanerProperties;
-import org.htmlcleaner.HtmlCleaner;
-import org.htmlcleaner.TagNode;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-public class HtmlSerializerTest {
-	
-	static HtmlCleaner cleaner;
-	static CleanerProperties properties;
-	
-	@BeforeClass
-	public static void setup(){
-		cleaner = new  org.htmlcleaner.HtmlCleaner();
-		// set cleaner properties	
-		properties  = cleaner.getProperties();
-		properties.setOmitDoctypeDeclaration(false);
-		properties.setOmitXmlDeclaration(true);
-		properties.setUseCdataForScriptAndStyle(true);
-		properties.setUseEmptyElementTags(false);	
-	}
-	
-	private String parse(String content){
-		StringWriter writer = new StringWriter();
-		HtmlSerializer ser = new HtmlSerializer(properties);
-		try {
-			TagNode html = cleaner.clean(content);
-			ser.writeXml(html, writer, "UTF-8");
-			return writer.getBuffer().toString();
-		} catch (IOException e) {
-			return null;
-		}
-	}
-	
-	// tests the content of event handlers are not escaped
-	@Test
-	public void eventHandlerAttribute(){
-		String out = parse("<body onload=\"$('#projects').dataTable();\">");
-		assertEquals("<html><head></head><body onload=\"$('#projects').dataTable();\"></body></html>", out);
-	}
-	
-	// tests that other attributes do have content escaped
-	@Test
-	public void otherAttr(){
-		String out = parse("<body class=\"$('#projects').dataTable();\">");
-		assertEquals("<html><head></head><body class=\"$(&apos;#projects&apos;).dataTable();\"></body></html>", out);
-	}
-	
-	// tests that script tags are not encoded
-	@Test
-	public void scriptTag(){
-		String out = parse("<script>$('#projects').dataTable();</script>");
-		assertEquals("<html><head></head><body><script>$('#projects').dataTable();</script></body></html>", out);
-	}
-	
-	// tests that non-script tags are encoded
-	@Test
-	public void sillyTag(){
-		String out = parse("<silly>$('#projects').dataTable();</silly>");
-		assertEquals("<html><head></head><body><silly>$(&apos;#projects&apos;).dataTable();</silly></body></html>", out);
-	}
-	
-	// TODO tests for inline CSS
-
-}