SLING-8566 support processing instruction and xml declaration
diff --git a/pom.xml b/pom.xml
index b886200..0388733 100644
--- a/pom.xml
+++ b/pom.xml
@@ -28,7 +28,7 @@
     </parent>
 
     <artifactId>org.apache.sling.commons.html</artifactId>
-    <version>1.1.1-SNAPSHOT</version>
+    <version>1.2.0-SNAPSHOT</version>
 
     <name>Apache Sling Commons HTML Utilities</name>
     <description>
diff --git a/src/main/java/org/apache/sling/commons/html/impl/parser/TagParser.java b/src/main/java/org/apache/sling/commons/html/impl/parser/TagParser.java
index 7e36805..541070d 100644
--- a/src/main/java/org/apache/sling/commons/html/impl/parser/TagParser.java
+++ b/src/main/java/org/apache/sling/commons/html/impl/parser/TagParser.java
@@ -14,10 +14,13 @@
     for (t=first; t != cur.next; t = t.next) {

       if (t.specialToken != null) {

         Token tt=t.specialToken;

-        while (tt.specialToken != null)

+        while (tt.specialToken != null) {

           tt = tt.specialToken;

-        for (; tt != null; tt = tt.next)

+        }

+        while (tt != null) {

           sb.append(tt.image);

+          tt = tt.next;

+        }

       };

       sb.append(t.image);

     };

diff --git a/src/main/java/org/apache/sling/commons/html/internal/TagstreamHtmlParser.java b/src/main/java/org/apache/sling/commons/html/internal/TagstreamHtmlParser.java
index aeea507..f2f2101 100644
--- a/src/main/java/org/apache/sling/commons/html/internal/TagstreamHtmlParser.java
+++ b/src/main/java/org/apache/sling/commons/html/internal/TagstreamHtmlParser.java
@@ -52,7 +52,10 @@
     @Override

     public Document parse(String systemId, InputStream stream, String encoding) throws IOException {

         final DOMBuilder builder = new DOMBuilder();

-        Html.stream(stream, encoding).forEach(new HtmlSAXSupport(builder, builder));

+        HtmlSAXSupport support = new HtmlSAXSupport(builder, builder);

+        support.startDocument();

+        Html.stream(stream, encoding).forEach(support);

+        support.endDocument();

         return builder.getDocument();

     }

 

diff --git a/src/main/java/org/apache/sling/commons/html/util/HtmlSAXSupport.java b/src/main/java/org/apache/sling/commons/html/util/HtmlSAXSupport.java
index 810a929..32182d2 100644
--- a/src/main/java/org/apache/sling/commons/html/util/HtmlSAXSupport.java
+++ b/src/main/java/org/apache/sling/commons/html/util/HtmlSAXSupport.java
@@ -13,6 +13,7 @@
  */

 package org.apache.sling.commons.html.util;

 

+import java.io.IOException;

 import java.util.Map;

 import java.util.function.Consumer;

 

@@ -25,19 +26,23 @@
 import org.xml.sax.ext.DefaultHandler2;

 import org.xml.sax.ext.LexicalHandler;

 

+/**

+ * Utility Class for the TagstreamHTMLParser to generate SAX events

+ * 

+ *

+ */

 public class HtmlSAXSupport implements Consumer<HtmlElement> {

-    

+

     private static final DefaultHandler2 handler = new DefaultHandler2();

-    

+

     private ContentHandler contentHandler = handler;

     private LexicalHandler lexicalHandler = handler;

-    private boolean initialized;

 

     public HtmlSAXSupport(ContentHandler ch, final LexicalHandler lh) {

         if (ch != null) {

             contentHandler = ch;

         }

-        if (lh != null ) {

+        if (lh != null) {

             lexicalHandler = lh;

         }

     }

@@ -45,10 +50,6 @@
     @Override

     public void accept(HtmlElement element) {

         try {

-            if (!initialized) {

-                contentHandler.startDocument();

-                initialized = true;

-            }

             String value = element.getValue();

             switch (element.getType()) {

             case COMMENT:

@@ -64,6 +65,12 @@
                 contentHandler.endDocument();

                 break;

             case START_TAG:

+                if (value.startsWith("?")) {

+                    if (!value.equalsIgnoreCase("?xml")) {

+                        contentHandler.processingInstruction(value, attrsToString(element.getAttributes()));

+                    }

+                    break;

+                }

                 lexicalHandler.startEntity(value);

                 contentHandler.startElement("", value, value, HtmlSAXSupport.convert(element.getAttributes()));

                 break;

@@ -74,17 +81,38 @@
                 break;

             }

         } catch (SAXException se) {

-            //log message

+            //se.printStackTrace();

         }

 

     }

-    

-    public static Attributes convert(Map<String,AttrValue> attributes) {

+

+    public static Attributes convert(Map<String, AttrValue> attributes) {

         Attributes2Impl response = new Attributes2Impl();

-        attributes.entrySet().forEach(attr ->

-            response.addAttribute("",attr.getKey(), attr.getKey(), "xsi:String", attr.getValue().toString())

-        );

+        attributes.entrySet().forEach(attr -> response.addAttribute("", attr.getKey(), attr.getKey(), "xsi:String",

+                attr.getValue().toString()));

         return response;

     }

 

+    public void startDocument() throws IOException {

+        try {

+            contentHandler.startDocument();

+        } catch (SAXException e) {

+            throw new IOException(e);

+        }

+    }

+

+    public void endDocument() throws IOException {

+        try {

+            contentHandler.endDocument();

+        } catch (SAXException e) {

+            throw new IOException(e);

+        }

+    }

+    

+    private String attrsToString(Map<String, AttrValue> attributes) {

+        StringBuilder sb = new StringBuilder();

+        attributes.entrySet().forEach(attr -> sb.append(attr.toString()));

+        return sb.toString();

+    }

+

 }

diff --git a/src/main/java/org/apache/sling/commons/html/util/package-info.java b/src/main/java/org/apache/sling/commons/html/util/package-info.java
index 23efc4f..7aeb4ff 100644
--- a/src/main/java/org/apache/sling/commons/html/util/package-info.java
+++ b/src/main/java/org/apache/sling/commons/html/util/package-info.java
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-@Version("1.1.0")
+@Version("1.2.0")
 package org.apache.sling.commons.html.util;
 
 import org.osgi.annotation.versioning.Version;
diff --git a/src/main/javacc/htmlParser.jj b/src/main/javacc/htmlParser.jj
index 8a74ab7..f868e2b 100644
--- a/src/main/javacc/htmlParser.jj
+++ b/src/main/javacc/htmlParser.jj
@@ -34,10 +34,13 @@
     for (t=first; t != cur.next; t = t.next) {

       if (t.specialToken != null) {

         Token tt=t.specialToken;

-        while (tt.specialToken != null) 

+        while (tt.specialToken != null) {

           tt = tt.specialToken;

-        for (; tt != null; tt = tt.next) 

+        }

+        while (tt != null) { 

           sb.append(tt.image);

+          tt = tt.next;

+        }

       };

       sb.append(t.image);

     };

diff --git a/src/test/java/org/apache/sling/commons/html/TagstreamHtmlParseTest.java b/src/test/java/org/apache/sling/commons/html/TagstreamHtmlParseTest.java
index 04b8183..a3579df 100644
--- a/src/test/java/org/apache/sling/commons/html/TagstreamHtmlParseTest.java
+++ b/src/test/java/org/apache/sling/commons/html/TagstreamHtmlParseTest.java
@@ -18,8 +18,10 @@
 package org.apache.sling.commons.html;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertTrue;
 
+import java.io.IOException;
 import java.io.InputStream;
 import java.text.ParseException;
 import java.util.function.Function;
@@ -29,6 +31,7 @@
 import org.apache.sling.commons.html.util.HtmlSAXSupport;
 import org.junit.Before;
 import org.junit.Test;
+import org.w3c.dom.Document;
 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
 import org.xml.sax.ext.DefaultHandler2;
@@ -43,11 +46,11 @@
     private HtmlParser htmlParser;
 
     /*
-     * Japanese (google) translation of 'Don't forget me this weekend!' 
-     * standard text of xml sample note.xml
+     * Japanese (google) translation of 'Don't forget me this weekend!' standard
+     * text of xml sample note.xml
      */
-    private static final String MESSAGE ="この週末私を忘れないで!";
-    
+    private static final String MESSAGE = "この週末私を忘れないで!";
+
     @Before
     public void setUp() throws ParseException, Exception {
         InputStream is = this.getClass().getResourceAsStream("/demo.html");
@@ -79,7 +82,7 @@
             @Override
             public void startElement(String uri, String localName, String qName, Attributes attributes)
                     throws SAXException {
-                //System.out.println(localName);
+                // System.out.println(localName);
             }
 
         }, new DefaultHandler2());
@@ -90,7 +93,7 @@
     public void docParseTagTest3() throws Exception {
         long count = stream.flatMap(TagMapper.map((element, process) -> {
             if (element.containsAttribute("href")) {
-                //System.out.println(element.getAttributeValue("href"));
+                // System.out.println(element.getAttributeValue("href"));
                 process.next(element);
             }
         })).count();
@@ -121,10 +124,9 @@
 
     @Test
     public void convertLinkAndPrintTest() throws Exception {
-        //stream.flatMap(CONVERT_LINKS).map(HtmlStreams.TO_HTML).forEach(System.out::print);
+        // stream.flatMap(CONVERT_LINKS).map(HtmlStreams.TO_HTML).forEach(System.out::print);
     }
 
-    
     @Before
     public void setup() {
 
@@ -142,7 +144,13 @@
             }
         });
     }
-    
+
+    @Test
+    public void testDomSupport() throws SAXException, IOException {
+        Document dom = htmlParser.parse("123456", inputStream, "UTF-8");
+        assertNotEquals(dom, null);
+    }
+
     @Test
     public void testEncodingSupportFailure() throws SAXException {
         htmlParser.parse(inputStream, "ISO8859-1", new DefaultHandler() {