blob: b9c4bfeb7c997edf98f72a3b0eb17de56803d121 [file] [log] [blame]
package org.apache.maven.doxia.linkcheck.validation;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.io.IOException;
import java.net.URL;
import java.util.Map;
import org.apache.commons.httpclient.Credentials;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HostConfiguration;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.HttpState;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import org.apache.commons.httpclient.NTCredentials;
import org.apache.commons.httpclient.StatusLine;
import org.apache.commons.httpclient.UsernamePasswordCredentials;
import org.apache.commons.httpclient.auth.AuthScope;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.HeadMethod;
import org.apache.commons.httpclient.params.HttpClientParams;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.maven.doxia.linkcheck.HttpBean;
import org.apache.maven.doxia.linkcheck.model.LinkcheckFileResult;
import org.codehaus.plexus.util.StringUtils;
/**
* Checks links which are normal URLs
*
* @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
* @author <a href="mailto:aheritier@apache.org">Arnaud Heritier</a>
* @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
*/
public final class OnlineHTTPLinkValidator
extends HTTPLinkValidator
{
/** Log for debug output. */
private static final Log LOG = LogFactory.getLog( OnlineHTTPLinkValidator.class );
/** The maximum number of redirections for a link. */
private static final int MAX_NB_REDIRECT = 10;
/** Use the get method to test pages. */
private static final String GET_METHOD = "get";
/** Use the head method to test pages. */
private static final String HEAD_METHOD = "head";
/** The http bean encapsuling all http parameters supported. */
private HttpBean http;
/** The base URL for links that start with '/'. */
private String baseURL;
/** The HttpClient. */
private transient HttpClient cl;
/**
* Constructor: initialize settings, use "head" method.
*/
public OnlineHTTPLinkValidator()
{
this( new HttpBean() );
}
/**
* Constructor: initialize settings.
*
* @param bean The http bean encapsuling all HTTP parameters supported.
*/
public OnlineHTTPLinkValidator( HttpBean bean )
{
if ( bean == null )
{
bean = new HttpBean();
}
if ( LOG.isDebugEnabled() )
{
LOG.debug( "Will use method : [" + bean.getMethod() + "]" );
}
this.http = bean;
initHttpClient();
}
/**
* The base URL.
*
* @return the base URL.
*/
public String getBaseURL()
{
return this.baseURL;
}
/**
* Sets the base URL. This is pre-pended to links that start with '/'.
*
* @param url the base URL.
*/
public void setBaseURL( String url )
{
this.baseURL = url;
}
/** {@inheritDoc} */
@Override
public LinkValidationResult validateLink( LinkValidationItem lvi )
{
if ( this.cl == null )
{
initHttpClient();
}
if ( this.http.getHttpClientParameters() != null )
{
for ( Map.Entry<Object, Object> entry : this.http.getHttpClientParameters().entrySet() )
{
if ( entry.getValue() != null )
{
System.setProperty( entry.getKey().toString(), entry.getValue().toString() );
}
}
}
// Some web servers don't allow the default user-agent sent by httpClient
System.setProperty( HttpMethodParams.USER_AGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)" );
this.cl.getParams().setParameter( HttpMethodParams.USER_AGENT,
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)" );
String link = lvi.getLink();
String anchor = "";
int idx = link.indexOf( '#' );
if ( idx != -1 )
{
anchor = link.substring( idx + 1 );
link = link.substring( 0, idx );
}
try
{
if ( link.startsWith( "/" ) )
{
if ( getBaseURL() == null )
{
if ( LOG.isWarnEnabled() )
{
LOG.warn( "Cannot check link [" + link + "] in page [" + lvi.getSource()
+ "], as no base URL has been set!" );
}
return new LinkValidationResult( LinkcheckFileResult.WARNING_LEVEL, false,
"No base URL specified" );
}
link = getBaseURL() + link;
}
HttpMethod hm = null;
try
{
hm = checkLink( link, 0 );
}
catch ( Throwable t )
{
if ( LOG.isDebugEnabled() )
{
LOG.debug( "Received: [" + t + "] for [" + link + "] in page [" + lvi.getSource() + "]", t );
}
return new LinkValidationResult( LinkcheckFileResult.ERROR_LEVEL, false, t.getClass().getName()
+ " : " + t.getMessage() );
}
if ( hm == null )
{
return new LinkValidationResult( LinkcheckFileResult.ERROR_LEVEL, false,
"Cannot retreive HTTP Status" );
}
if ( hm.getStatusCode() == HttpStatus.SC_OK )
{
// lets check if the anchor is present
if ( anchor.length() > 0 )
{
String content = hm.getResponseBodyAsString();
if ( !Anchors.matchesAnchor( content, anchor ) )
{
return new HTTPLinkValidationResult( LinkcheckFileResult.VALID_LEVEL, false,
"Missing anchor '" + anchor + "'" );
}
}
return new HTTPLinkValidationResult( LinkcheckFileResult.VALID_LEVEL, true, hm.getStatusCode(),
hm.getStatusText() );
}
String msg =
"Received: [" + hm.getStatusCode() + "] for [" + link + "] in page [" + lvi.getSource() + "]";
// If there's a redirection ... add a warning
if ( hm.getStatusCode() == HttpStatus.SC_MOVED_PERMANENTLY
|| hm.getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY
|| hm.getStatusCode() == HttpStatus.SC_TEMPORARY_REDIRECT )
{
LOG.warn( msg );
return new HTTPLinkValidationResult( LinkcheckFileResult.WARNING_LEVEL, true, hm.getStatusCode(),
hm.getStatusText() );
}
LOG.debug( msg );
return new HTTPLinkValidationResult( LinkcheckFileResult.ERROR_LEVEL, false, hm.getStatusCode(),
hm.getStatusText() );
}
catch ( Throwable t )
{
String msg = "Received: [" + t + "] for [" + link + "] in page [" + lvi.getSource() + "]";
if ( LOG.isDebugEnabled() )
{
LOG.debug( msg, t );
}
else
{
LOG.error( msg );
}
return new LinkValidationResult( LinkcheckFileResult.ERROR_LEVEL, false, t.getMessage() );
}
finally
{
System.getProperties().remove( HttpMethodParams.USER_AGENT );
if ( this.http.getHttpClientParameters() != null )
{
for ( Map.Entry<Object, Object> entry : this.http.getHttpClientParameters().entrySet() )
{
if ( entry.getValue() != null )
{
System.getProperties().remove( entry.getKey().toString() );
}
}
}
}
}
/** Initialize the HttpClient. */
private void initHttpClient()
{
LOG.debug( "A new HttpClient instance is needed ..." );
this.cl = new HttpClient( new MultiThreadedHttpConnectionManager() );
// Default params
if ( this.http.getTimeout() != 0 )
{
this.cl.getHttpConnectionManager().getParams().setConnectionTimeout( this.http.getTimeout() );
this.cl.getHttpConnectionManager().getParams().setSoTimeout( this.http.getTimeout() );
}
this.cl.getParams().setBooleanParameter( HttpClientParams.ALLOW_CIRCULAR_REDIRECTS, true );
HostConfiguration hc = new HostConfiguration();
HttpState state = new HttpState();
if ( StringUtils.isNotEmpty( this.http.getProxyHost() ) )
{
hc.setProxy( this.http.getProxyHost(), this.http.getProxyPort() );
if ( LOG.isDebugEnabled() )
{
LOG.debug( "Proxy Host:" + this.http.getProxyHost() );
LOG.debug( "Proxy Port:" + this.http.getProxyPort() );
}
if ( StringUtils.isNotEmpty( this.http.getProxyUser() ) && this.http.getProxyPassword() != null )
{
if ( LOG.isDebugEnabled() )
{
LOG.debug( "Proxy User:" + this.http.getProxyUser() );
}
Credentials credentials;
if ( StringUtils.isNotEmpty( this.http.getProxyNtlmHost() ) )
{
credentials =
new NTCredentials( this.http.getProxyUser(), this.http.getProxyPassword(),
this.http.getProxyNtlmHost(), this.http.getProxyNtlmDomain() );
}
else
{
credentials =
new UsernamePasswordCredentials( this.http.getProxyUser(), this.http.getProxyPassword() );
}
state.setProxyCredentials( AuthScope.ANY, credentials );
}
}
else
{
LOG.debug( "Not using a proxy" );
}
this.cl.setHostConfiguration( hc );
this.cl.setState( state );
LOG.debug( "New HttpClient instance created." );
}
/**
* Checks the given link.
*
* @param link the link to check.
* @param nbRedirect the number of current redirects.
* @return HttpMethod
* @throws IOException if something goes wrong.
*/
private HttpMethod checkLink( String link, int nbRedirect )
throws IOException
{
int max = MAX_NB_REDIRECT;
if ( this.http.getHttpClientParameters() != null
&& this.http.getHttpClientParameters().get( HttpClientParams.MAX_REDIRECTS ) != null )
{
try
{
max =
Integer.valueOf(
this.http.getHttpClientParameters().get( HttpClientParams.MAX_REDIRECTS )
.toString() ).intValue();
}
catch ( NumberFormatException e )
{
if ( LOG.isWarnEnabled() )
{
LOG.warn( "HttpClient parameter '" + HttpClientParams.MAX_REDIRECTS
+ "' is not a number. Ignoring!" );
}
}
}
if ( nbRedirect > max )
{
throw new HttpException( "Maximum number of redirections (" + max + ") exceeded" );
}
HttpMethod hm;
if ( HEAD_METHOD.equalsIgnoreCase( this.http.getMethod() ) )
{
hm = new HeadMethod( link );
}
else if ( GET_METHOD.equalsIgnoreCase( this.http.getMethod() ) )
{
hm = new GetMethod( link );
}
else
{
if ( LOG.isErrorEnabled() )
{
LOG.error( "Unsupported method: " + this.http.getMethod() + ", using 'get'." );
}
hm = new GetMethod( link );
}
// Default
hm.setFollowRedirects( this.http.isFollowRedirects() );
try
{
URL url = new URL( link );
cl.getHostConfiguration().setHost( url.getHost(), url.getPort(), url.getProtocol() );
cl.executeMethod( hm );
StatusLine sl = hm.getStatusLine();
if ( sl == null )
{
if ( LOG.isErrorEnabled() )
{
LOG.error( "Unknown error validating link : " + link );
}
return null;
}
if ( hm.getStatusCode() == HttpStatus.SC_MOVED_PERMANENTLY
|| hm.getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY
|| hm.getStatusCode() == HttpStatus.SC_TEMPORARY_REDIRECT )
{
Header locationHeader = hm.getResponseHeader( "location" );
if ( locationHeader == null )
{
LOG.error( "Site sent redirect, but did not set Location header" );
return hm;
}
String newLink = locationHeader.getValue();
// Be careful to absolute/relative links
if ( !newLink.startsWith( "http://" ) && !newLink.startsWith( "https://" ) )
{
if ( newLink.startsWith( "/" ) )
{
URL oldUrl = new URL( link );
newLink =
oldUrl.getProtocol() + "://" + oldUrl.getHost()
+ ( oldUrl.getPort() > 0 ? ":" + oldUrl.getPort() : "" ) + newLink;
}
else
{
newLink = link + newLink;
}
}
HttpMethod oldHm = hm;
if ( LOG.isDebugEnabled() )
{
LOG.debug( "[" + link + "] is redirected to [" + newLink + "]" );
}
oldHm.releaseConnection();
hm = checkLink( newLink, nbRedirect + 1 );
// Restore the hm to "Moved permanently" | "Moved temporarily" | "Temporary redirect"
// if the new location is found to allow us to report it
if ( hm.getStatusCode() == HttpStatus.SC_OK && nbRedirect == 0 )
{
return oldHm;
}
}
}
finally
{
hm.releaseConnection();
}
return hm;
}
}