blob: acea4aac8a20ac62eea4a9f3e41089874cda995c [file] [log] [blame]
package org.apache.maven.doxia.linkcheck;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.io.File;
import java.io.IOException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.maven.doxia.linkcheck.model.LinkcheckFile;
import org.apache.maven.doxia.linkcheck.model.LinkcheckFileResult;
import org.apache.maven.doxia.linkcheck.model.LinkcheckModel;
import org.apache.maven.doxia.linkcheck.model.io.xpp3.LinkcheckModelXpp3Writer;
import org.apache.maven.doxia.linkcheck.validation.FileLinkValidator;
import org.apache.maven.doxia.linkcheck.validation.HTTPLinkValidationResult;
import org.apache.maven.doxia.linkcheck.validation.LinkValidationItem;
import org.apache.maven.doxia.linkcheck.validation.LinkValidationResult;
import org.apache.maven.doxia.linkcheck.validation.LinkValidatorManager;
import org.apache.maven.doxia.linkcheck.validation.MailtoLinkValidator;
import org.apache.maven.doxia.linkcheck.validation.OfflineHTTPLinkValidator;
import org.apache.maven.doxia.linkcheck.validation.OnlineHTTPLinkValidator;
import org.codehaus.plexus.component.annotations.Component;
import org.codehaus.plexus.util.FileUtils;
import org.codehaus.plexus.util.ReaderFactory;
import org.codehaus.plexus.util.StringUtils;
import org.codehaus.plexus.util.WriterFactory;
/**
* The main bean to be called whenever a set of documents should have their links checked.
*
* @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
* @author <a href="mailto:carlos@apache.org">Carlos Sanchez</a>
* @author <a href="mailto:aheritier@apache.org">Arnaud Heritier</a>
* @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
*/
@Component( role = LinkCheck.class )
public final class DefaultLinkCheck
implements LinkCheck
{
/** Log. */
private static final Log LOG = LogFactory.getLog( DefaultLinkCheck.class );
/** One MegaByte. */
private static final long MEG = 1024 * 1024;
/** The basedir to check. */
private File basedir;
/** Linkcheck Cache. */
private File linkCheckCache;
/**
* To exclude some links. Could contains a link, i.e. <code>http:&#47;&#47;maven.apache.org</code>,
* or pattern links i.e. <code>http:&#47;&#47;maven.apache.org&#47;**&#47;*.html</code>
*/
private String[] excludedLinks = null;
/** To exclude some pages. */
private String[] excludedPages = null;
/**
* Excluded http errors only in online mode.
*/
private int[] excludedHttpStatusErrors = null;
/**
* Excluded http warnings only in online mode.
*/
private int[] excludedHttpStatusWarnings = null;
/** Online mode. */
private boolean online;
/** Bean enncapsulating some https parameters */
private HttpBean http;
/** Internal LinkValidatorManager. */
private LinkValidatorManager lvm = null;
/** Report output file for xml document. */
private File reportOutput;
/** Report output encoding for the xml document, UTF-8 by default. */
private String reportOutputEncoding = "UTF-8";
/** The base URL for links that start with '/'. */
private String baseURL;
/** The encoding used to process files, UTF-8 by default. */
private String encoding = ReaderFactory.UTF_8;
// ----------------------------------------------------------------------
// Public methods
// ----------------------------------------------------------------------
/** {@inheritDoc} */
@Override
public void setBasedir( File base )
{
this.basedir = base;
}
/** {@inheritDoc} */
@Override
public void setBaseURL( String url )
{
this.baseURL = url;
}
/** {@inheritDoc} */
@Override
public void setExcludedHttpStatusErrors( int[] excl )
{
this.excludedHttpStatusErrors = excl;
}
/** {@inheritDoc} */
@Override
public void setExcludedHttpStatusWarnings( int[] excl )
{
this.excludedHttpStatusWarnings = excl;
}
/** {@inheritDoc} */
@Override
public void setExcludedLinks( String[] excl )
{
this.excludedLinks = excl;
}
/** {@inheritDoc} */
@Override
public void setExcludedPages( String[] excl )
{
this.excludedPages = excl;
}
/** {@inheritDoc} */
@Override
public void setHttp( HttpBean http )
{
this.http = http;
}
/** {@inheritDoc} */
@Override
public void setLinkCheckCache( File cacheFile )
{
this.linkCheckCache = cacheFile;
}
/** {@inheritDoc} */
@Override
public void setOnline( boolean onLine )
{
this.online = onLine;
}
/** {@inheritDoc} */
@Override
public void setReportOutput( File file )
{
this.reportOutput = file;
}
/** {@inheritDoc} */
@Override
public void setReportOutputEncoding( String encoding )
{
this.reportOutputEncoding = encoding;
}
/** {@inheritDoc} */
@Override
public LinkcheckModel execute()
throws LinkCheckException
{
if ( this.basedir == null )
{
LOG.error( "No base directory specified!" );
throw new NullPointerException( "The basedir can't be null!" );
}
if ( this.reportOutput == null )
{
LOG.warn( "No output file specified! Results will not be written!" );
}
LinkcheckModel model = new LinkcheckModel();
model.setModelEncoding( reportOutputEncoding );
model.setFiles( new LinkedList<LinkcheckFile>() );
displayMemoryConsumption();
LinkValidatorManager validator = getLinkValidatorManager();
try
{
validator.loadCache( this.linkCheckCache );
}
catch ( IOException e )
{
throw new LinkCheckException( "Could not load cache: " + e.getMessage(), e );
}
displayMemoryConsumption();
LOG.info( "Begin to check links in files..." );
try
{
findAndCheckFiles( this.basedir, model );
}
catch ( IOException e )
{
throw new LinkCheckException( "Could not scan base directory: " + basedir.getAbsolutePath(), e );
}
LOG.info( "Links checked." );
displayMemoryConsumption();
try
{
createDocument( model );
}
catch ( IOException e )
{
throw new LinkCheckException( "Could not write the linkcheck document: " + e.getMessage(), e );
}
try
{
validator.saveCache( this.linkCheckCache );
}
catch ( IOException e )
{
throw new LinkCheckException( "Could not save cache: " + e.getMessage(), e );
}
displayMemoryConsumption();
return model;
}
/** {@inheritDoc} */
@Override
public void setEncoding( String encoding )
{
if ( encoding == null || encoding.isEmpty() )
{
throw new IllegalArgumentException( "encoding is required" );
}
try
{
Charset.forName( encoding );
}
catch ( UnsupportedCharsetException e )
{
throw new IllegalArgumentException( "encoding '" + encoding + "' is unsupported" );
}
this.encoding = encoding;
}
// ----------------------------------------------------------------------
// Private methods
// ----------------------------------------------------------------------
/**
* Whether links are checked in online mode.
*
* @return online
*/
private boolean isOnline()
{
return this.online;
}
/**
* Returns the excluded links.
* Could contains a link, i.e. <code>http:&#47;&#47;maven.apache.org/</code>,
* or pattern links i.e. <code>http:&#47;&#47;maven.apache.org&#47;**&#47;*.html</code>
*
* @return String[]
*/
private String[] getExcludedLinks()
{
return this.excludedLinks;
}
/**
* Gets the comma separated list of effective exclude patterns.
*
* @return The comma separated list of effective exclude patterns, never <code>null</code>.
*/
private String getExcludedPages()
{
LinkedList<String> patternList = new LinkedList<String>( FileUtils.getDefaultExcludesAsList() );
if ( excludedPages != null )
{
patternList.addAll( Arrays.asList( excludedPages ) );
}
return StringUtils.join( patternList.iterator(), "," );
}
/**
* Gets the comma separated list of effective include patterns.
*
* @return the comma separated list of effective include patterns, never <code>null</code>
*/
private String getIncludedPages()
{
return "**/*.html,**/*.htm";
}
/**
* Returns the excluded HTTP errors, i.e. <code>404</code>.
*
* @return int[]
* @see {@link HttpStatus} for all possible values.
*/
private int[] getExcludedHttpStatusErrors()
{
return this.excludedHttpStatusErrors;
}
/**
* Returns the excluded HTTP warnings, i.e. <code>301</code>.
*
* @return int[]
* @see {@link HttpStatus} for all possible values.
*/
private int[] getExcludedHttpStatusWarnings()
{
return this.excludedHttpStatusWarnings;
}
/**
* Returns the LinkValidatorManager.
* If this hasn't been set before with {@link #setLinkValidatorManager(LinkValidatorManager)}
* a default LinkValidatorManager will be returned.
*
* @return the LinkValidatorManager
*/
private LinkValidatorManager getLinkValidatorManager()
{
if ( this.lvm == null )
{
initDefaultLinkValidatorManager();
}
return this.lvm;
}
/**
* Intializes the current LinkValidatorManager to a default value.
*/
private void initDefaultLinkValidatorManager()
{
this.lvm = new LinkValidatorManager();
if ( getExcludedLinks() != null )
{
this.lvm.setExcludedLinks( getExcludedLinks() );
}
this.lvm.addLinkValidator( new FileLinkValidator( encoding ) );
if ( isOnline() )
{
OnlineHTTPLinkValidator olv = new OnlineHTTPLinkValidator( http );
if ( this.baseURL != null )
{
olv.setBaseURL( baseURL );
}
this.lvm.addLinkValidator( olv );
}
else
{
this.lvm.addLinkValidator( new OfflineHTTPLinkValidator() );
}
this.lvm.addLinkValidator( new MailtoLinkValidator() );
}
/**
* Recurses through the given base directory and adds/checks
* files to the model that pass through the current filter.
*
* @param base the base directory to traverse.
*/
private void findAndCheckFiles( File base, LinkcheckModel model )
throws IOException
{
for ( File file : FileUtils.getFiles( base, getIncludedPages(), getExcludedPages() ) )
{
checkFile( file, model );
}
}
private void checkFile( File file, LinkcheckModel model )
{
if ( LOG.isDebugEnabled() )
{
LOG.debug( " File - " + file );
}
String fileRelativePath = file.getAbsolutePath();
if ( fileRelativePath.startsWith( this.basedir.getAbsolutePath() ) )
{
fileRelativePath = fileRelativePath.substring( this.basedir.getAbsolutePath().length() + 1 );
}
fileRelativePath = fileRelativePath.replace( '\\', '/' );
LinkcheckFile linkcheckFile = new LinkcheckFile();
linkcheckFile.setAbsolutePath( file.getAbsolutePath() );
linkcheckFile.setRelativePath( fileRelativePath );
check( linkcheckFile );
model.addFile( linkcheckFile );
if ( ( model.getFiles().size() % 100 == 0 ) && LOG.isInfoEnabled() )
{
LOG.info( "Found " + model.getFiles().size() + " files so far." );
}
}
/**
* Validates a linkcheck file.
*
* @param linkcheckFile the linkcheckFile object to validate
*/
private void check( LinkcheckFile linkcheckFile )
{
linkcheckFile.setSuccessful( 0 );
linkcheckFile.setUnsuccessful( 0 );
if ( LOG.isDebugEnabled() )
{
LOG.debug( "Validating " + linkcheckFile.getRelativePath() );
}
final Set<String> hrefs;
try
{
hrefs = LinkMatcher.match( new File( linkcheckFile.getAbsolutePath() ), encoding );
}
catch ( StackOverflowError | IOException t )
{
// There is a chance that the domReader will throw
// a stack overflow exception for some files
LOG.error( "Received: [" + t + "] in page [" + linkcheckFile.getRelativePath() + "]" );
LOG.debug( t.getMessage(), t );
LinkcheckFileResult lcr = new LinkcheckFileResult();
lcr.setStatus( "PARSE FAILURE" );
lcr.setTarget( "N/A" );
linkcheckFile.addResult( lcr );
return;
}
for ( String href : hrefs )
{
LinkcheckFileResult lcr = new LinkcheckFileResult();
LinkValidationItem lvi = new LinkValidationItem( new File( linkcheckFile.getAbsolutePath() ), href );
LinkValidationResult result = lvm.validateLink( lvi );
lcr.setTarget( href );
lcr.setErrorMessage( result.getErrorMessage() );
switch ( result.getStatus() )
{
case LinkcheckFileResult.VALID_LEVEL:
linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );
lcr.setStatus( LinkcheckFileResult.VALID );
// At some point we won't want to store valid links. The tests require that we do at present.
linkcheckFile.addResult( lcr );
break;
case LinkcheckFileResult.ERROR_LEVEL:
boolean ignoredError = false;
if ( result instanceof HTTPLinkValidationResult )
{
HTTPLinkValidationResult httpResult = (HTTPLinkValidationResult) result;
if ( httpResult.getHttpStatusCode() > 0
&& getExcludedHttpStatusErrors() != null
&& StringUtils.indexOfAny( String.valueOf( httpResult.getHttpStatusCode() ),
toStringArray( getExcludedHttpStatusErrors() ) ) >= 0 )
{
ignoredError = true;
}
}
if ( ignoredError )
{
linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );
}
else
{
linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );
}
lcr.setStatus( ignoredError ? LinkcheckFileResult.VALID : LinkcheckFileResult.ERROR );
linkcheckFile.addResult( lcr );
break;
case LinkcheckFileResult.WARNING_LEVEL:
boolean ignoredWarning = false;
if ( result instanceof HTTPLinkValidationResult )
{
HTTPLinkValidationResult httpResult = (HTTPLinkValidationResult) result;
if ( httpResult.getHttpStatusCode() > 0
&& getExcludedHttpStatusWarnings() != null
&& StringUtils.indexOfAny( String.valueOf( httpResult.getHttpStatusCode() ),
toStringArray( getExcludedHttpStatusWarnings() ) ) >= 0 )
{
ignoredWarning = true;
}
}
if ( ignoredWarning )
{
linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );
}
else
{
linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );
}
lcr.setStatus( ignoredWarning ? LinkcheckFileResult.VALID : LinkcheckFileResult.WARNING );
linkcheckFile.addResult( lcr );
break;
case LinkcheckFileResult.UNKNOWN_LEVEL:
default:
linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );
lcr.setStatus( LinkcheckFileResult.UNKNOWN );
linkcheckFile.addResult( lcr );
break;
}
}
}
/**
* Writes some memory data to the log (if debug enabled).
*/
private void displayMemoryConsumption()
{
if ( LOG.isDebugEnabled() )
{
Runtime r = Runtime.getRuntime();
LOG.debug( "Memory: " + ( r.totalMemory() - r.freeMemory() ) / MEG + "M/" + r.totalMemory() / MEG
+ "M" );
}
}
/**
* Create the XML document from the currently available details.
*
* @throws IOException if any
*/
private void createDocument( LinkcheckModel model )
throws IOException
{
if ( this.reportOutput == null )
{
return;
}
File dir = this.reportOutput.getParentFile();
if ( dir != null && !dir.exists() )
{
if ( !dir.mkdirs() )
{
throw new IOException( "Could not create " + dir );
}
}
LinkcheckModelXpp3Writer xpp3Writer = new LinkcheckModelXpp3Writer();
try ( Writer writer = WriterFactory.newXmlWriter( this.reportOutput ) )
{
xpp3Writer.write( writer, model );
}
catch ( IllegalStateException e )
{
IOException ioe =
new IOException( e.getMessage() + " Maybe try to specify another encoding instead of '"
+ encoding + "'.", e );
throw ioe;
}
}
private static String[] toStringArray( int[] array )
{
if ( array == null )
{
throw new NullPointerException( "array must not be null" );
}
String[] result = new String[array.length];
for ( int i = 0; i < array.length; i++ )
{
result[i] = String.valueOf( array[i] );
}
return result;
}
}