[DOXIA-410] Link checker not handling anchors very well
Submitted by: James Strachan
Patch applied with minor modifs to allow for brackets in regexps as needed for links to javadoc methods
git-svn-id: https://svn.apache.org/repos/asf/maven/doxia/doxia-tools/trunk@1030039 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/src/main/java/org/apache/maven/doxia/linkcheck/validation/Anchors.java b/src/main/java/org/apache/maven/doxia/linkcheck/validation/Anchors.java
new file mode 100644
index 0000000..a3009c1
--- /dev/null
+++ b/src/main/java/org/apache/maven/doxia/linkcheck/validation/Anchors.java
@@ -0,0 +1,59 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.maven.doxia.linkcheck.validation;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * A helper class to test if some content matches the given HTML anchor
+ */
+public class Anchors
+{
+ /**
+ * Returns true if the given anchor can be found in the content markup.
+ *
+ * @param content the content string.
+ * @param anchor the anchor to match.
+ *
+ * @return true if the given anchor can be found in the content markup.
+ */
+ public static boolean matchesAnchor( String content, String anchor )
+ {
+ if ( content != null && anchor.length() > 0 ) {
+ // can use name or id attributes and also can use single or double quotes with whitespace around the =
+ String regex = "(name|id)\\s*=\\s*('|\")" + escapeBrackets( anchor ) + "('|\")";
+ Pattern pattern = Pattern.compile( regex );
+ Matcher matcher = pattern.matcher( content );
+ return matcher.find();
+ }
+ return false;
+ }
+
+ // for javadoc links, see DOXIA-410
+ private static String escapeBrackets( String content )
+ {
+ final String escaped = content.replace( "(", "\\(" ).replace( ")", "\\)" );
+ return escaped.replace( "[", "\\[" ).replace( "]", "\\]" );
+ }
+
+ private Anchors()
+ {
+ // utility class
+ }
+}
diff --git a/src/main/java/org/apache/maven/doxia/linkcheck/validation/FileLinkValidator.java b/src/main/java/org/apache/maven/doxia/linkcheck/validation/FileLinkValidator.java
index 4001be3..b050f50 100644
--- a/src/main/java/org/apache/maven/doxia/linkcheck/validation/FileLinkValidator.java
+++ b/src/main/java/org/apache/maven/doxia/linkcheck/validation/FileLinkValidator.java
@@ -108,7 +108,7 @@
{
// the anchor exists?
String content = read( lvi.getSource(), encoding );
- if ( content != null && content.indexOf( "name=\"" + anchor + "\"" ) != -1 )
+ if ( Anchors.matchesAnchor( content, anchor ) )
{
return lvi.getSource();
}
@@ -119,7 +119,7 @@
// the anchor exists?
String content = read( new File( lvi.getSource().getParentFile(), link ), encoding );
- if ( content != null && content.indexOf( "name=\"" + anchor + "\"" ) != -1 )
+ if ( Anchors.matchesAnchor( content, anchor ) )
{
return new File( lvi.getSource().getParentFile(), link );
}
diff --git a/src/main/java/org/apache/maven/doxia/linkcheck/validation/OnlineHTTPLinkValidator.java b/src/main/java/org/apache/maven/doxia/linkcheck/validation/OnlineHTTPLinkValidator.java
index f0d3a26..361a611 100644
--- a/src/main/java/org/apache/maven/doxia/linkcheck/validation/OnlineHTTPLinkValidator.java
+++ b/src/main/java/org/apache/maven/doxia/linkcheck/validation/OnlineHTTPLinkValidator.java
@@ -157,6 +157,14 @@
this.cl.getParams().setParameter( HttpMethodParams.USER_AGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)" );
String link = lvi.getLink();
+ String anchor = "";
+ int idx = link.indexOf( '#' );
+ if ( idx != -1 )
+ {
+ anchor = link.substring( idx + 1 );
+ link = link.substring( 0, idx );
+ }
+
try
{
if ( link.startsWith( "/" ) )
@@ -200,6 +208,17 @@
if ( hm.getStatusCode() == HttpStatus.SC_OK )
{
+ // lets check if the anchor is present
+ if ( anchor.length() > 0 )
+ {
+ String content = hm.getResponseBodyAsString();
+
+ if ( !Anchors.matchesAnchor( content, anchor ) )
+ {
+ return new HTTPLinkValidationResult( LinkcheckFileResult.VALID_LEVEL, false,
+ "Missing anchor '" + anchor + "'" );
+ }
+ }
return new HTTPLinkValidationResult( LinkcheckFileResult.VALID_LEVEL, true, hm.getStatusCode(),
hm.getStatusText() );
}
diff --git a/src/test/java/org/apache/maven/doxia/linkcheck/AnchorLinkTest.java b/src/test/java/org/apache/maven/doxia/linkcheck/AnchorLinkTest.java
new file mode 100644
index 0000000..962ed00
--- /dev/null
+++ b/src/test/java/org/apache/maven/doxia/linkcheck/AnchorLinkTest.java
@@ -0,0 +1,98 @@
+package org.apache.maven.doxia.linkcheck;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.maven.doxia.linkcheck.model.LinkcheckFile;
+import org.apache.maven.doxia.linkcheck.model.LinkcheckModel;
+import org.codehaus.plexus.PlexusTestCase;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+/**
+ * @author Ben Walding
+ * @author <a href="mailto:carlos@apache.org">Carlos Sanchez</a>
+ * @version $Id: LinkCheckTest.java 800044 2009-08-02 12:28:50Z vsiveton $
+ */
+public class AnchorLinkTest
+ extends PlexusTestCase
+{
+ /**
+ * @throws Exception
+ */
+ public void testScan()
+ throws Exception
+ {
+ LinkCheck lc = (LinkCheck) lookup( LinkCheck.ROLE );
+ assertNotNull( lc );
+
+ lc.setOnline( true ); // TODO: check if online
+
+ lc.setBasedir( new File( getBasedir(), "src/test/resources/anchorTest" ) ); // TODO
+
+ lc.setReportOutput( new File( getBasedir(), "target/linkcheck/anchorTest/linkcheck.xml" ) );
+
+ lc.setReportOutputEncoding( "UTF-8" );
+
+ lc.setLinkCheckCache( new File( getBasedir(), "target/linkcheck/anchorTest/linkcheck.cache" ) ); // TODO
+
+ String[] excludes = new String[]
+ {
+ "http://cvs.apache.org/viewcvs.cgi/maven-pluginszz/",
+ "http://cvs.apache.org/viewcvs.cgi/mavenzz/"
+ };
+
+ lc.setExcludedLinks( excludes );
+
+ LinkcheckModel result = lc.execute();
+
+ Iterator iter = result.getFiles().iterator();
+
+ Map map = new HashMap();
+
+ while ( iter.hasNext() )
+ {
+ LinkcheckFile ftc = (LinkcheckFile) iter.next();
+ map.put( ftc.getRelativePath(), ftc );
+ }
+
+ assertEquals( "files.size()", 1, result.getFiles().size() );
+
+ LinkcheckFile ftc = check( map, "testAnchor.html", 1 );
+
+ //System.out.println("anchor test " + ftc.getResults());
+
+ assertEquals( "Should have matched!", 1, ftc.getSuccessful() );
+ assertEquals( "Should have no failures!", 0, ftc.getUnsuccessful() );
+ }
+
+ private LinkcheckFile check( Map map, String name, int linkCount )
+ {
+ LinkcheckFile ftc = (LinkcheckFile) map.get( name );
+
+ assertNotNull( name + " = null!", ftc );
+
+ assertEquals( name + ".getResults().size()", linkCount, ftc.getResults().size() );
+
+ return ftc;
+ }
+}
diff --git a/src/test/java/org/apache/maven/doxia/linkcheck/LinkCheckTest.java b/src/test/java/org/apache/maven/doxia/linkcheck/LinkCheckTest.java
index b68db7a..d185a72 100644
--- a/src/test/java/org/apache/maven/doxia/linkcheck/LinkCheckTest.java
+++ b/src/test/java/org/apache/maven/doxia/linkcheck/LinkCheckTest.java
@@ -73,7 +73,7 @@
map.put( ftc.getRelativePath(), ftc );
}
- assertEquals( "files.size()", 9, result.getFiles().size() );
+ assertEquals( "files.size()", 10, result.getFiles().size() );
check( map, "nolink.html", 0 );
check( map, "test-resources/nolink.html", 0 );
@@ -81,6 +81,7 @@
check( map, "test-resources/test1/test2.html", 0 );
check( map, "test1/test1.html", 1 );
check( map, "testA.html", 3 );
+ check( map, "anchorTest/testAnchor.html", 1 );
check( map, "linkincomment.html", 1 );
/* test excludes */
diff --git a/src/test/java/org/apache/maven/doxia/linkcheck/validation/AnchorsTest.java b/src/test/java/org/apache/maven/doxia/linkcheck/validation/AnchorsTest.java
new file mode 100644
index 0000000..e78283d
--- /dev/null
+++ b/src/test/java/org/apache/maven/doxia/linkcheck/validation/AnchorsTest.java
@@ -0,0 +1,52 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.maven.doxia.linkcheck.validation;
+
+import junit.framework.TestCase;
+
+public class AnchorsTest extends TestCase
+{
+
+ public void testAnchorMatching()
+ {
+ assertAnchorMatches( "hello <h1 id='foo'>Foo</h1> there", "foo", true );
+ assertAnchorMatches( "hello <h1 id = 'foo'>Foo</h1> there", "foo", true );
+ assertAnchorMatches( "hello <h1 id=\"foo\">Foo</h1> there", "foo", true );
+ assertAnchorMatches( "hello <h1 id='foo2'>Foo</h1> there", "foo", false );
+ final String apiAnchor = "assertEqualArrays(java.lang.Object[], java.lang.Object[])";
+ assertAnchorMatches( "hello <h1 id='" + apiAnchor + "'>Foo</h1> there", apiAnchor, true );
+
+ assertAnchorMatches( "<html>\n"
+ + "<body>\n"
+ + "\n"
+ + "<h1 id='foo'>Foo</h1>\n"
+ + "<p>Some text</p>\n"
+ + "\n"
+ + "<h2>Something</h2>\n"
+ + "<p>Lets try using a link: <a href=\"testAnchor.html#foo\">FooLink</a></p>\n"
+ + "\n"
+ + "</body>\n"
+ + "</html>", "foo", true );
+ }
+
+ protected void assertAnchorMatches( String content, String anchor, boolean expected )
+ {
+ boolean actual = Anchors.matchesAnchor( content, anchor );
+ assertEquals( "anchor: " + anchor + " in: " + content, expected, actual );
+ }
+}
diff --git a/src/test/resources/anchorTest/testAnchor.html b/src/test/resources/anchorTest/testAnchor.html
new file mode 100644
index 0000000..8ac0fc9
--- /dev/null
+++ b/src/test/resources/anchorTest/testAnchor.html
@@ -0,0 +1,11 @@
+<html>
+<body>
+
+<h1 id='foo'>Foo</h1>
+<p>Some text</p>
+
+<h2>Something</h2>
+<p>Lets try using a link: <a href="testAnchor.html#foo">FooLink</a></p>
+
+</body>
+</html>
\ No newline at end of file