SLING-8654 corrected issue with HTML5 parsing of doctype
diff --git a/src/main/java/org/apache/sling/commons/html/impl/parser/TagParser.java b/src/main/java/org/apache/sling/commons/html/impl/parser/TagParser.java
index 541070d..9261c6e 100644
--- a/src/main/java/org/apache/sling/commons/html/impl/parser/TagParser.java
+++ b/src/main/java/org/apache/sling/commons/html/impl/parser/TagParser.java
@@ -213,7 +213,7 @@
alist.put(token.image,new AttrValue());
}
jj_consume_token(DECL_END);
- {if (true) return new StartTag(tok.image, alist);}
+ {if (true) return new DocType(tok.image, alist);}
} catch (ParseException ex) {
token_source.SwitchTo(DEFAULT);
String s = getTokenHtmlText(firstToken, getNextToken());
diff --git a/src/main/java/org/apache/sling/commons/html/impl/tag/DocType.java b/src/main/java/org/apache/sling/commons/html/impl/tag/DocType.java
index 29a1cf2..f673837 100644
--- a/src/main/java/org/apache/sling/commons/html/impl/tag/DocType.java
+++ b/src/main/java/org/apache/sling/commons/html/impl/tag/DocType.java
@@ -31,7 +31,7 @@
}
public DocType(String text) {
- this.value = "!DOCTYPE";
+ this.value = "DOCTYPE";
this.attributes = new HashMap<>();
this.attributes.put(text, new AttrValue());
}
diff --git a/src/main/java/org/apache/sling/commons/html/util/HtmlElements.java b/src/main/java/org/apache/sling/commons/html/util/HtmlElements.java
index d13c2c0..dc8bfd3 100644
--- a/src/main/java/org/apache/sling/commons/html/util/HtmlElements.java
+++ b/src/main/java/org/apache/sling/commons/html/util/HtmlElements.java
@@ -93,6 +93,19 @@
case DOCTYPE:
buffer.append("<!");
buffer.append(element.getValue());
+ if (element.hasAttributes()) {
+ buffer.append(' ');
+ buffer.append(element.getAttributes().entrySet().stream().map(entry -> {
+ StringBuilder sb2 = new StringBuilder();
+ sb2.append(entry.getKey());
+ AttrValue value = entry.getValue();
+ if (!value.isEmpty()) {
+ sb2.append("=");
+ sb2.append(value.quoteIfNeeded());
+ }
+ return sb2.toString();
+ }).collect(Collectors.joining(" ")));
+ }
buffer.append(">");
break;
case END_TAG:
diff --git a/src/main/javacc/htmlParser.jj b/src/main/javacc/htmlParser.jj
index f868e2b..47fcde4 100644
--- a/src/main/javacc/htmlParser.jj
+++ b/src/main/javacc/htmlParser.jj
@@ -228,7 +228,7 @@
try {
<DECL_START> tok=<DECL_TAG> (< DECL_ATTR > { alist.put(token.image,new AttrValue()); })* <DECL_END>
{
- return new StartTag(tok.image, alist);
+ return new DocType(tok.image, alist);
}
} catch (ParseException ex) {
token_source.SwitchTo(DEFAULT);
diff --git a/src/test/java/org/apache/sling/commons/html/TagstreamHtmlParseTest.java b/src/test/java/org/apache/sling/commons/html/TagstreamHtmlParseTest.java
index 72eb1c1..1092b96 100644
--- a/src/test/java/org/apache/sling/commons/html/TagstreamHtmlParseTest.java
+++ b/src/test/java/org/apache/sling/commons/html/TagstreamHtmlParseTest.java
@@ -64,13 +64,13 @@
@Test
public void parseDocAndCountStartTags() throws Exception {
long count = stream.filter(elem -> elem.getType() == HtmlElementType.START_TAG).count();
- assertEquals(902, count);
+ assertEquals(901, count);
}
@Test
public void parseDocumentConvertBackToHtml() throws Exception {
String content = stream.collect(HtmlElements.elementsToHtml());
- assertEquals(62062, content.length());
+ assertEquals(62063, content.length());
}
@@ -98,7 +98,7 @@
}, new DefaultHandler2());
stream.forEach(support);
- assertEquals(902, count.get());
+ assertEquals(901, count.get());
}
@Test
@@ -135,7 +135,7 @@
@Test
public void convertLinkAndPrintTest() throws Exception {
- // stream.flatMap(CONVERT_LINKS).map(HtmlStreams.TO_HTML).forEach(System.out::print);
+ stream.flatMap(CONVERT_LINKS).map(HtmlElements.TO_HTML).forEach(System.out::print);
}
@Before