Ignore any leading HTML comment blocks when identifying the first header
git-svn-id: https://svn.apache.org/repos/asf/maven/doxia/doxia/trunk@1541442 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java b/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java
index 22f94fe..dcfb507 100644
--- a/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java
+++ b/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java
@@ -30,6 +30,7 @@
import org.pegdown.Extensions;
import org.pegdown.PegDownProcessor;
import org.pegdown.ast.HeaderNode;
+import org.pegdown.ast.HtmlBlockNode;
import org.pegdown.ast.Node;
import org.pegdown.ast.RootNode;
import org.pegdown.ast.SuperNode;
@@ -164,8 +165,14 @@
RootNode rootNode = PEGDOWN_PROCESSOR.parseMarkdown( text.toCharArray() );
if ( !haveTitle && rootNode.getChildren().size() > 0 )
{
- // use the first node only if it is a heading
- final Node firstNode = rootNode.getChildren().get( 0 );
+ // use the first (non-comment) node only if it is a heading
+ int i = 0;
+ Node firstNode = null;
+ while ( i < rootNode.getChildren().size() && isHtmlComment(
+ ( firstNode = rootNode.getChildren().get( i ) ) ) )
+ {
+ i++;
+ }
if ( firstNode instanceof HeaderNode )
{
html.append( "<title>" );
@@ -186,6 +193,14 @@
}
}
+ public static boolean isHtmlComment( Node node ) {
+ if (node instanceof HtmlBlockNode) {
+ HtmlBlockNode blockNode = (HtmlBlockNode) node;
+ return blockNode.getText().startsWith( "<!--" );
+ }
+ return false;
+ }
+
public static String nodeText( Node node )
{
StringBuilder builder = new StringBuilder();
diff --git a/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownParserTest.java b/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownParserTest.java
index cc39888..4b1627e 100644
--- a/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownParserTest.java
+++ b/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownParserTest.java
@@ -221,6 +221,7 @@
{
Iterator<SinkEventElement> it = parseFileToEventTestingSink( "first-heading" ).getEventList().iterator();
+ // NOTE: H1 is rendered as "unknown" and H2 is "section1"
assertEquals( it, "head", "title", "text", "title_", "head_", "body", "section1", "sectionTitle1", "text",
"sectionTitle1_", "paragraph", "text", "paragraph_", "section1_", "body_" );
@@ -228,6 +229,23 @@
}
/**
+ * Assert the first header is passed as title event when parsing "comment-before-heading.md".
+ *
+ * @throws Exception if the event list is not correct when parsing the document.
+ */
+ public void testCommentBeforeHeadingSinkEvent()
+ throws Exception
+ {
+ Iterator<SinkEventElement> it = parseFileToEventTestingSink( "comment-before-heading" ).getEventList().iterator();
+
+ // NOTE: H1 is rendered as "unknown" and H2 is "section1"
+ assertEquals( it, "head", "title", "text", "title_", "head_", "body", "comment", "unknown", "text",
+ "unknown", "paragraph", "text", "paragraph_", "body_" );
+
+ assertFalse( it.hasNext() );
+ }
+
+ /**
* Parse the file and return a {@link SinkEventTestingSink}.
*
* @param file the file to parse with {@link #parser}.
diff --git a/doxia-modules/doxia-module-markdown/src/test/resources/comment-before-heading.md b/doxia-modules/doxia-module-markdown/src/test/resources/comment-before-heading.md
new file mode 100644
index 0000000..949ee15
--- /dev/null
+++ b/doxia-modules/doxia-module-markdown/src/test/resources/comment-before-heading.md
@@ -0,0 +1,25 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+Test document
+=============
+
+This document verifies that an initial HTML comment block does not prevent the first heading
+from being used as the document title. Currently the H1 headings are not rendered as sections by
+Doxia.
\ No newline at end of file