[DOXIA-410] Link checker not handling anchors very well
Submitted by: James Strachan
Patch applied with minor modifs to allow for brackets in regexps as needed for links to javadoc methods

git-svn-id: https://svn.apache.org/repos/asf/maven/doxia/doxia-tools/trunk@1030039 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/src/main/java/org/apache/maven/doxia/linkcheck/validation/Anchors.java b/src/main/java/org/apache/maven/doxia/linkcheck/validation/Anchors.java
new file mode 100644
index 0000000..a3009c1
--- /dev/null
+++ b/src/main/java/org/apache/maven/doxia/linkcheck/validation/Anchors.java
@@ -0,0 +1,59 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.maven.doxia.linkcheck.validation;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * A helper class to test if some content matches the given HTML anchor
+ */
+public class Anchors
+{
+    /**
+     * Returns true if the given anchor can be found in the content markup.
+     *
+     * @param content the content string.
+     * @param anchor the anchor to match.
+     *
+     * @return true if the given anchor can be found in the content markup.
+     */
+    public static boolean matchesAnchor( String content, String anchor )
+    {
+        if ( content != null && anchor.length() > 0 ) {
+            // can use name or id attributes and also can use single or double quotes with whitespace around the =
+            String regex = "(name|id)\\s*=\\s*('|\")" + escapeBrackets( anchor ) + "('|\")";
+            Pattern pattern = Pattern.compile( regex );
+            Matcher matcher = pattern.matcher( content );
+            return matcher.find();
+        }
+        return false;
+    }
+
+    // for javadoc links, see DOXIA-410
+    private static String escapeBrackets( String content )
+    {
+        final String escaped = content.replace( "(", "\\(" ).replace( ")", "\\)" );
+        return escaped.replace( "[", "\\[" ).replace( "]", "\\]" );
+    }
+
+    private Anchors()
+    {
+        // utility class
+    }
+}
diff --git a/src/main/java/org/apache/maven/doxia/linkcheck/validation/FileLinkValidator.java b/src/main/java/org/apache/maven/doxia/linkcheck/validation/FileLinkValidator.java
index 4001be3..b050f50 100644
--- a/src/main/java/org/apache/maven/doxia/linkcheck/validation/FileLinkValidator.java
+++ b/src/main/java/org/apache/maven/doxia/linkcheck/validation/FileLinkValidator.java
@@ -108,7 +108,7 @@
             {
                 // the anchor exists?
                 String content = read( lvi.getSource(), encoding );
-                if ( content != null && content.indexOf( "name=\"" + anchor + "\"" ) != -1 )
+                if ( Anchors.matchesAnchor( content, anchor ) )
                 {
                     return lvi.getSource();
                 }
@@ -119,7 +119,7 @@
 
             // the anchor exists?
             String content = read( new File( lvi.getSource().getParentFile(), link ), encoding );
-            if ( content != null && content.indexOf( "name=\"" + anchor + "\"" ) != -1 )
+            if ( Anchors.matchesAnchor( content, anchor ) )
             {
                 return new File( lvi.getSource().getParentFile(), link );
             }
diff --git a/src/main/java/org/apache/maven/doxia/linkcheck/validation/OnlineHTTPLinkValidator.java b/src/main/java/org/apache/maven/doxia/linkcheck/validation/OnlineHTTPLinkValidator.java
index f0d3a26..361a611 100644
--- a/src/main/java/org/apache/maven/doxia/linkcheck/validation/OnlineHTTPLinkValidator.java
+++ b/src/main/java/org/apache/maven/doxia/linkcheck/validation/OnlineHTTPLinkValidator.java
@@ -157,6 +157,14 @@
         this.cl.getParams().setParameter( HttpMethodParams.USER_AGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)" );
 
         String link = lvi.getLink();
+        String anchor = "";
+        int idx = link.indexOf( '#' );
+        if ( idx != -1 )
+        {
+            anchor = link.substring( idx + 1 );
+            link = link.substring( 0, idx );
+        }
+
         try
         {
             if ( link.startsWith( "/" ) )
@@ -200,6 +208,17 @@
 
             if ( hm.getStatusCode() == HttpStatus.SC_OK )
             {
+                // lets check if the anchor is present
+                if ( anchor.length() > 0 )
+                {
+                    String content = hm.getResponseBodyAsString();
+
+                    if ( !Anchors.matchesAnchor( content, anchor ) )
+                    {
+                        return new HTTPLinkValidationResult( LinkcheckFileResult.VALID_LEVEL, false,
+                            "Missing anchor '" + anchor + "'" );
+                    }
+                }
                 return new HTTPLinkValidationResult( LinkcheckFileResult.VALID_LEVEL, true, hm.getStatusCode(),
                                                      hm.getStatusText() );
             }
diff --git a/src/test/java/org/apache/maven/doxia/linkcheck/AnchorLinkTest.java b/src/test/java/org/apache/maven/doxia/linkcheck/AnchorLinkTest.java
new file mode 100644
index 0000000..962ed00
--- /dev/null
+++ b/src/test/java/org/apache/maven/doxia/linkcheck/AnchorLinkTest.java
@@ -0,0 +1,98 @@
+package org.apache.maven.doxia.linkcheck;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.maven.doxia.linkcheck.model.LinkcheckFile;
+import org.apache.maven.doxia.linkcheck.model.LinkcheckModel;
+import org.codehaus.plexus.PlexusTestCase;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+/**
+ * @author Ben Walding
+ * @author <a href="mailto:carlos@apache.org">Carlos Sanchez</a>
+ * @version $Id: LinkCheckTest.java 800044 2009-08-02 12:28:50Z vsiveton $
+ */
+public class AnchorLinkTest
+    extends PlexusTestCase
+{
+    /**
+     * @throws Exception
+     */
+    public void testScan()
+        throws Exception
+    {
+        LinkCheck lc = (LinkCheck) lookup( LinkCheck.ROLE );
+        assertNotNull( lc );
+
+        lc.setOnline( true ); // TODO: check if online
+
+        lc.setBasedir( new File( getBasedir(), "src/test/resources/anchorTest" ) ); // TODO
+
+        lc.setReportOutput( new File( getBasedir(), "target/linkcheck/anchorTest/linkcheck.xml" ) );
+
+        lc.setReportOutputEncoding( "UTF-8" );
+
+        lc.setLinkCheckCache( new File( getBasedir(), "target/linkcheck/anchorTest/linkcheck.cache" ) ); // TODO
+
+        String[] excludes = new String[]
+        {
+            "http://cvs.apache.org/viewcvs.cgi/maven-pluginszz/",
+            "http://cvs.apache.org/viewcvs.cgi/mavenzz/"
+        };
+
+        lc.setExcludedLinks( excludes );
+
+        LinkcheckModel result = lc.execute();
+
+        Iterator iter = result.getFiles().iterator();
+
+        Map map = new HashMap();
+
+        while ( iter.hasNext() )
+        {
+            LinkcheckFile ftc = (LinkcheckFile) iter.next();
+            map.put( ftc.getRelativePath(), ftc );
+        }
+
+        assertEquals( "files.size()", 1, result.getFiles().size() );
+
+        LinkcheckFile ftc = check( map, "testAnchor.html", 1 );
+
+        //System.out.println("anchor test " + ftc.getResults());
+
+        assertEquals( "Should have matched!", 1, ftc.getSuccessful() );
+        assertEquals( "Should have no failures!", 0, ftc.getUnsuccessful() );
+    }
+
+    private LinkcheckFile check( Map map, String name, int linkCount )
+    {
+        LinkcheckFile ftc = (LinkcheckFile) map.get( name );
+
+        assertNotNull( name + " = null!", ftc );
+
+        assertEquals( name + ".getResults().size()", linkCount, ftc.getResults().size() );
+
+        return ftc;
+    }
+}
diff --git a/src/test/java/org/apache/maven/doxia/linkcheck/LinkCheckTest.java b/src/test/java/org/apache/maven/doxia/linkcheck/LinkCheckTest.java
index b68db7a..d185a72 100644
--- a/src/test/java/org/apache/maven/doxia/linkcheck/LinkCheckTest.java
+++ b/src/test/java/org/apache/maven/doxia/linkcheck/LinkCheckTest.java
@@ -73,7 +73,7 @@
             map.put( ftc.getRelativePath(), ftc );
         }
 
-        assertEquals( "files.size()", 9, result.getFiles().size() );
+        assertEquals( "files.size()", 10, result.getFiles().size() );
 
         check( map, "nolink.html", 0 );
         check( map, "test-resources/nolink.html", 0 );
@@ -81,6 +81,7 @@
         check( map, "test-resources/test1/test2.html", 0 );
         check( map, "test1/test1.html", 1 );
         check( map, "testA.html", 3 );
+        check( map, "anchorTest/testAnchor.html", 1 );
         check( map, "linkincomment.html", 1 );
 
         /* test excludes */
diff --git a/src/test/java/org/apache/maven/doxia/linkcheck/validation/AnchorsTest.java b/src/test/java/org/apache/maven/doxia/linkcheck/validation/AnchorsTest.java
new file mode 100644
index 0000000..e78283d
--- /dev/null
+++ b/src/test/java/org/apache/maven/doxia/linkcheck/validation/AnchorsTest.java
@@ -0,0 +1,52 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.maven.doxia.linkcheck.validation;
+
+import junit.framework.TestCase;
+
+public class AnchorsTest extends TestCase
+{
+
+    public void testAnchorMatching()
+    {
+        assertAnchorMatches( "hello  <h1 id='foo'>Foo</h1> there", "foo", true );
+        assertAnchorMatches( "hello  <h1 id = 'foo'>Foo</h1> there", "foo", true );
+        assertAnchorMatches( "hello  <h1 id=\"foo\">Foo</h1> there", "foo", true );
+        assertAnchorMatches( "hello  <h1 id='foo2'>Foo</h1> there", "foo", false );
+        final String apiAnchor = "assertEqualArrays(java.lang.Object[], java.lang.Object[])";
+        assertAnchorMatches( "hello  <h1 id='" + apiAnchor + "'>Foo</h1> there", apiAnchor, true );
+
+        assertAnchorMatches( "<html>\n"
+            + "<body>\n"
+            + "\n"
+            + "<h1 id='foo'>Foo</h1>\n"
+            + "<p>Some text</p>\n"
+            + "\n"
+            + "<h2>Something</h2>\n"
+            + "<p>Lets try using a link: <a href=\"testAnchor.html#foo\">FooLink</a></p>\n"
+            + "\n"
+            + "</body>\n"
+            + "</html>", "foo", true );
+    }
+
+    protected void assertAnchorMatches( String content, String anchor, boolean expected )
+    {
+        boolean actual = Anchors.matchesAnchor( content, anchor );
+        assertEquals( "anchor: " + anchor + " in: " + content, expected, actual );
+    }
+}
diff --git a/src/test/resources/anchorTest/testAnchor.html b/src/test/resources/anchorTest/testAnchor.html
new file mode 100644
index 0000000..8ac0fc9
--- /dev/null
+++ b/src/test/resources/anchorTest/testAnchor.html
@@ -0,0 +1,11 @@
+<html>
+<body>
+
+<h1 id='foo'>Foo</h1>
+<p>Some text</p>
+
+<h2>Something</h2>
+<p>Lets try using a link: <a href="testAnchor.html#foo">FooLink</a></p>
+
+</body>
+</html>
\ No newline at end of file