| package org.apache.maven.doxia.parser; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| import java.io.BufferedReader; |
| import java.io.ByteArrayInputStream; |
| import java.io.File; |
| import java.io.FileOutputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.OutputStream; |
| import java.io.Reader; |
| import java.io.StringReader; |
| import java.net.URL; |
| import java.util.Hashtable; |
| import java.util.LinkedHashMap; |
| import java.util.Locale; |
| import java.util.Map; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| import org.apache.http.HttpEntity; |
| import org.apache.http.HttpResponse; |
| import org.apache.http.HttpStatus; |
| import org.apache.http.client.ClientProtocolException; |
| import org.apache.http.client.HttpRequestRetryHandler; |
| import org.apache.http.client.methods.HttpGet; |
| import org.apache.http.impl.client.DefaultHttpClient; |
| import org.apache.http.impl.client.DefaultHttpRequestRetryHandler; |
| import org.apache.http.util.EntityUtils; |
| |
| import org.apache.maven.doxia.macro.MacroExecutionException; |
| import org.apache.maven.doxia.markup.XmlMarkup; |
| import org.apache.maven.doxia.sink.Sink; |
| import org.apache.maven.doxia.sink.SinkEventAttributeSet; |
| import org.apache.maven.doxia.util.HtmlTools; |
| import org.apache.maven.doxia.util.XmlValidator; |
| |
| import org.codehaus.plexus.util.FileUtils; |
| import org.codehaus.plexus.util.IOUtil; |
| import org.codehaus.plexus.util.StringUtils; |
| import org.codehaus.plexus.util.xml.pull.MXParser; |
| import org.codehaus.plexus.util.xml.pull.XmlPullParser; |
| import org.codehaus.plexus.util.xml.pull.XmlPullParserException; |
| |
| import org.xml.sax.EntityResolver; |
| import org.xml.sax.InputSource; |
| import org.xml.sax.SAXException; |
| |
| /** |
| * An abstract class that defines some convenience methods for <code>XML</code> parsers. |
| * |
| * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a> |
| * @version $Id$ |
| * @since 1.0 |
| */ |
| public abstract class AbstractXmlParser |
| extends AbstractParser |
| implements XmlMarkup |
| { |
| /** |
| * Entity pattern for HTML entity, i.e. &nbsp; |
| * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*> |
| * <br/> |
| * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>. |
| */ |
| private static final Pattern PATTERN_ENTITY_1 = |
| Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" ); |
| |
| /** |
| * Entity pattern for Unicode entity, i.e. &#38; |
| * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" |
| * <br/> |
| * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>. |
| */ |
| private static final Pattern PATTERN_ENTITY_2 = |
| Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" ); |
| |
| private boolean ignorableWhitespace; |
| |
| private boolean collapsibleWhitespace; |
| |
| private boolean trimmableWhitespace; |
| |
| private Map<String, String> entities; |
| |
| private boolean validate = false; |
| |
| /** {@inheritDoc} */ |
| public void parse( Reader source, Sink sink ) |
| throws ParseException |
| { |
| init(); |
| |
| Reader src = source; |
| |
| // 1 first parsing if validation is required |
| if ( isValidate() ) |
| { |
| String content; |
| try |
| { |
| content = IOUtil.toString( new BufferedReader( src ) ); |
| } |
| catch ( IOException e ) |
| { |
| throw new ParseException( "Error reading the model: " + e.getMessage(), e ); |
| } |
| |
| new XmlValidator( getLog() ).validate( content ); |
| |
| src = new StringReader( content ); |
| } |
| |
| // 2 second parsing to process |
| try |
| { |
| XmlPullParser parser = new MXParser(); |
| |
| parser.setInput( src ); |
| |
| // allow parser initialization, e.g. for additional entities in XHTML |
| // Note: do it after input is set, otherwise values are reset |
| initXmlParser(parser); |
| |
| sink.enableLogging( getLog() ); |
| |
| parseXml( parser, sink ); |
| } |
| catch ( XmlPullParserException ex ) |
| { |
| throw new ParseException( "Error parsing the model: " + ex.getMessage(), ex, ex.getLineNumber(), |
| ex.getColumnNumber() ); |
| } |
| catch ( MacroExecutionException ex ) |
| { |
| throw new ParseException( "Macro execution failed: " + ex.getMessage(), ex ); |
| } |
| |
| setSecondParsing( false ); |
| init(); |
| } |
| |
| /** |
| * Initializes the parser with custom entities or other options. |
| * |
| * @param parser A parser, not null. |
| * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem initializing the parser |
| */ |
| protected void initXmlParser( XmlPullParser parser ) |
| throws XmlPullParserException |
| { |
| // nop |
| } |
| |
| /** |
| * {@inheritDoc} |
| * |
| * Convenience method to parse an arbitrary string and emit any xml events into the given sink. |
| */ |
| @Override |
| public void parse( String string, Sink sink ) |
| throws ParseException |
| { |
| super.parse( string, sink ); |
| } |
| |
| /** {@inheritDoc} */ |
| @Override |
| public final int getType() |
| { |
| return XML_TYPE; |
| } |
| |
| /** |
| * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet. |
| * |
| * @param parser A parser, not null. |
| * @return a SinkEventAttributeSet or null if the current parser event is not a start tag. |
| * @since 1.1 |
| */ |
| protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser ) |
| { |
| int count = parser.getAttributeCount(); |
| |
| if ( count < 0 ) |
| { |
| return null; |
| } |
| |
| SinkEventAttributeSet atts = new SinkEventAttributeSet( count ); |
| |
| for ( int i = 0; i < count; i++ ) |
| { |
| atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) ); |
| } |
| |
| return atts; |
| } |
| |
| /** |
| * Parse the model from the XmlPullParser into the given sink. |
| * |
| * @param parser A parser, not null. |
| * @param sink the sink to receive the events. |
| * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model |
| * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro |
| */ |
| private void parseXml( XmlPullParser parser, Sink sink ) |
| throws XmlPullParserException, MacroExecutionException |
| { |
| int eventType = parser.getEventType(); |
| |
| while ( eventType != XmlPullParser.END_DOCUMENT ) |
| { |
| if ( eventType == XmlPullParser.START_TAG ) |
| { |
| handleStartTag( parser, sink ); |
| } |
| else if ( eventType == XmlPullParser.END_TAG ) |
| { |
| handleEndTag( parser, sink ); |
| } |
| else if ( eventType == XmlPullParser.TEXT ) |
| { |
| String text = getText( parser ); |
| |
| if ( isIgnorableWhitespace() ) |
| { |
| if ( text.trim().length() != 0 ) |
| { |
| handleText( parser, sink ); |
| } |
| } |
| else |
| { |
| handleText( parser, sink ); |
| } |
| } |
| else if ( eventType == XmlPullParser.CDSECT ) |
| { |
| handleCdsect( parser, sink ); |
| } |
| else if ( eventType == XmlPullParser.COMMENT ) |
| { |
| handleComment( parser, sink ); |
| } |
| else if ( eventType == XmlPullParser.ENTITY_REF ) |
| { |
| handleEntity( parser, sink ); |
| } |
| else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE ) |
| { |
| // nop |
| } |
| else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION ) |
| { |
| // nop |
| } |
| else if ( eventType == XmlPullParser.DOCDECL ) |
| { |
| addLocalEntities( parser, parser.getText() ); |
| |
| for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() ) |
| { |
| addDTDEntities( parser, new String( res ) ); |
| } |
| } |
| |
| try |
| { |
| eventType = parser.nextToken(); |
| } |
| catch ( IOException io ) |
| { |
| throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io ); |
| } |
| } |
| } |
| |
| /** |
| * Goes through the possible start tags. |
| * |
| * @param parser A parser, not null. |
| * @param sink the sink to receive the events. |
| * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model |
| * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro |
| */ |
| protected abstract void handleStartTag( XmlPullParser parser, Sink sink ) |
| throws XmlPullParserException, MacroExecutionException; |
| |
| /** |
| * Goes through the possible end tags. |
| * |
| * @param parser A parser, not null. |
| * @param sink the sink to receive the events. |
| * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model |
| * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro |
| */ |
| protected abstract void handleEndTag( XmlPullParser parser, Sink sink ) |
| throws XmlPullParserException, MacroExecutionException; |
| |
| /** |
| * Handles text events. |
| * |
| * <p>This is a default implementation, if the parser points to a non-empty text element, |
| * it is emitted as a text event into the specified sink.</p> |
| * |
| * @param parser A parser, not null. |
| * @param sink the sink to receive the events. Not null. |
| * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model |
| */ |
| protected void handleText( XmlPullParser parser, Sink sink ) |
| throws XmlPullParserException |
| { |
| String text = getText( parser ); |
| |
| /* |
| * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the |
| * parser so any whitespace that makes it here is significant. |
| */ |
| if ( StringUtils.isNotEmpty( text ) ) |
| { |
| sink.text( text ); |
| } |
| } |
| |
| /** |
| * Handles CDATA sections. |
| * |
| * <p>This is a default implementation, all data are emitted as text |
| * events into the specified sink.</p> |
| * |
| * @param parser A parser, not null. |
| * @param sink the sink to receive the events. Not null. |
| * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model |
| */ |
| protected void handleCdsect( XmlPullParser parser, Sink sink ) |
| throws XmlPullParserException |
| { |
| sink.text( getText( parser ) ); |
| } |
| |
| /** |
| * Handles comments. |
| * |
| * <p>This is a default implementation, all data are emitted as comment |
| * events into the specified sink.</p> |
| * |
| * @param parser A parser, not null. |
| * @param sink the sink to receive the events. Not null. |
| * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model |
| */ |
| protected void handleComment( XmlPullParser parser, Sink sink ) |
| throws XmlPullParserException |
| { |
| sink.comment( getText( parser ).trim() ); |
| } |
| |
| /** |
| * Handles entities. |
| * |
| * <p>This is a default implementation, all entities are resolved and emitted as text |
| * events into the specified sink, except:</p> |
| * <ul> |
| * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code> |
| * are emitted as <code>nonBreakingSpace()</code> events.</li> |
| * </ul> |
| * |
| * @param parser A parser, not null. |
| * @param sink the sink to receive the events. Not null. |
| * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model |
| */ |
| protected void handleEntity( XmlPullParser parser, Sink sink ) |
| throws XmlPullParserException |
| { |
| String text = getText( parser ); |
| |
| String name = parser.getName(); |
| |
| if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) ) |
| { |
| sink.nonBreakingSpace(); |
| } |
| else |
| { |
| String unescaped = HtmlTools.unescapeHTML( text ); |
| |
| sink.text( unescaped ); |
| } |
| } |
| |
| /** |
| * Handles an unknown event. |
| * |
| * <p>This is a default implementation, all events are emitted as unknown |
| * events into the specified sink.</p> |
| * |
| * @param parser the parser to get the event from. |
| * @param sink the sink to receive the event. |
| * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE, |
| * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE. |
| * It will be passed as the first argument of the required parameters to the Sink |
| * {@link org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)} |
| * method. |
| */ |
| protected void handleUnknown( XmlPullParser parser, Sink sink, int type ) |
| { |
| Object[] required = new Object[] { Integer.valueOf( type ) }; |
| |
| SinkEventAttributeSet attribs = getAttributesFromParser( parser ); |
| |
| sink.unknown( parser.getName(), required, attribs ); |
| } |
| |
| /** |
| * <p>isIgnorableWhitespace.</p> |
| * |
| * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise. |
| * @see #setIgnorableWhitespace(boolean) |
| * @since 1.1 |
| */ |
| protected boolean isIgnorableWhitespace() |
| { |
| return ignorableWhitespace; |
| } |
| |
| /** |
| * Specify that whitespace will be ignored. I.e.: |
| * <pre><tr> <td/> </tr></pre> |
| * is equivalent to |
| * <pre><tr><td/></tr></pre> |
| * |
| * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise. |
| * @since 1.1 |
| */ |
| protected void setIgnorableWhitespace( boolean ignorable ) |
| { |
| this.ignorableWhitespace = ignorable; |
| } |
| |
| /** |
| * <p>isCollapsibleWhitespace.</p> |
| * |
| * @return <code>true</code> if text will collapse, <code>false</code> otherwise. |
| * @see #setCollapsibleWhitespace(boolean) |
| * @since 1.1 |
| */ |
| protected boolean isCollapsibleWhitespace() |
| { |
| return collapsibleWhitespace; |
| } |
| |
| /** |
| * Specify that text will be collapsed. I.e.: |
| * <pre>Text Text</pre> |
| * is equivalent to |
| * <pre>Text Text</pre> |
| * |
| * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise. |
| * @since 1.1 |
| */ |
| protected void setCollapsibleWhitespace( boolean collapsible ) |
| { |
| this.collapsibleWhitespace = collapsible; |
| } |
| |
| /** |
| * <p>isTrimmableWhitespace.</p> |
| * |
| * @return <code>true</code> if text will be trim, <code>false</code> otherwise. |
| * @see #setTrimmableWhitespace(boolean) |
| * @since 1.1 |
| */ |
| protected boolean isTrimmableWhitespace() |
| { |
| return trimmableWhitespace; |
| } |
| |
| /** |
| * Specify that text will be collapsed. I.e.: |
| * <pre><p> Text </p></pre> |
| * is equivalent to |
| * <pre><p>Text</p></pre> |
| * |
| * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise. |
| * @since 1.1 |
| */ |
| protected void setTrimmableWhitespace( boolean trimmable ) |
| { |
| this.trimmableWhitespace = trimmable; |
| } |
| |
| /** |
| * <p>getText.</p> |
| * |
| * @param parser A parser, not null. |
| * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration. |
| * @see XmlPullParser#getText() |
| * @see #isCollapsibleWhitespace() |
| * @see #isTrimmableWhitespace() |
| * @since 1.1 |
| */ |
| protected String getText( XmlPullParser parser ) |
| { |
| String text = parser.getText(); |
| |
| if ( isTrimmableWhitespace() ) |
| { |
| text = text.trim(); |
| } |
| |
| if ( isCollapsibleWhitespace() ) |
| { |
| StringBuilder newText = new StringBuilder(); |
| String[] elts = StringUtils.split( text, " \r\n" ); |
| for ( int i = 0; i < elts.length; i++ ) |
| { |
| newText.append( elts[i] ); |
| if ( ( i + 1 ) < elts.length ) |
| { |
| newText.append( " " ); |
| } |
| } |
| text = newText.toString(); |
| } |
| |
| return text; |
| } |
| |
| /** |
| * Return the defined entities in a local doctype. I.e.: |
| * <pre> |
| * <!DOCTYPE foo [ |
| * <!ENTITY bar "&#x160;"> |
| * <!ENTITY bar1 "&#x161;"> |
| * ]> |
| * </pre> |
| * |
| * @return a map of the defined entities in a local doctype. |
| * @since 1.1 |
| */ |
| protected Map<String, String> getLocalEntities() |
| { |
| if ( entities == null ) |
| { |
| entities = new LinkedHashMap<String, String>(); |
| } |
| |
| return entities; |
| } |
| |
| /** |
| * <p>isValidate.</p> |
| * |
| * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise. |
| * @since 1.1 |
| */ |
| public boolean isValidate() |
| { |
| return validate; |
| } |
| |
| /** |
| * Specify a flag to validate or not the XML content. |
| * |
| * @param validate the validate to set |
| * @see #parse(Reader, Sink) |
| * @since 1.1 |
| */ |
| public void setValidate( boolean validate ) |
| { |
| this.validate = validate; |
| } |
| |
| // ---------------------------------------------------------------------- |
| // Private methods |
| // ---------------------------------------------------------------------- |
| |
| /** |
| * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}. |
| * <br/> |
| * By default, we exclude the default XML entities: &amp;, &lt;, &gt;, &quot; and &apos;. |
| * |
| * @param parser not null |
| * @param entityName not null |
| * @param entityValue not null |
| * @throws XmlPullParserException if any |
| * @see {@link XmlPullParser#defineEntityReplacementText(String, String)} |
| */ |
| private void addEntity( XmlPullParser parser, String entityName, String entityValue ) |
| throws XmlPullParserException |
| { |
| if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" ) |
| || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) ) |
| { |
| return; |
| } |
| |
| parser.defineEntityReplacementText( entityName, entityValue ); |
| getLocalEntities().put( entityName, entityValue ); |
| } |
| |
| /** |
| * Handle entities defined in a local doctype as the following: |
| * <pre> |
| * <!DOCTYPE foo [ |
| * <!ENTITY bar "&#x160;"> |
| * <!ENTITY bar1 "&#x161;"> |
| * ]> |
| * </pre> |
| * |
| * @param parser not null |
| * @param text not null |
| * @throws XmlPullParserException if any |
| */ |
| private void addLocalEntities( XmlPullParser parser, String text ) |
| throws XmlPullParserException |
| { |
| int entitiesCount = StringUtils.countMatches( text, ENTITY_START ); |
| if ( entitiesCount > 0 ) |
| { |
| // text should be foo [...] |
| int start = text.indexOf( '['); |
| int end = text.lastIndexOf( ']'); |
| if ( start != -1 && end != -1 ) |
| { |
| addDTDEntities( parser, text.substring( start + 1, end ) ); |
| } |
| } |
| } |
| |
| /** |
| * Handle entities defined in external doctypes as the following: |
| * <pre> |
| * <!DOCTYPE foo [ |
| * <!-- These are the entity sets for ISO Latin 1 characters for the XHTML --> |
| * <!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN" |
| * "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"> |
| * %HTMLlat1; |
| * ]> |
| * </pre> |
| * |
| * @param parser not null |
| * @param text not null |
| * @throws XmlPullParserException if any |
| */ |
| private void addDTDEntities( XmlPullParser parser, String text ) |
| throws XmlPullParserException |
| { |
| int entitiesCount = StringUtils.countMatches( text, ENTITY_START ); |
| if ( entitiesCount > 0 ) |
| { |
| final String txt = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START ); |
| BufferedReader reader = new BufferedReader( new StringReader( txt ) ); |
| String line; |
| String tmpLine = ""; |
| try |
| { |
| Matcher matcher; |
| while ( ( line = reader.readLine() ) != null ) |
| { |
| tmpLine += "\n" + line; |
| matcher = PATTERN_ENTITY_1.matcher( tmpLine ); |
| if ( matcher.find() && matcher.groupCount() == 7 ) |
| { |
| String entityName = matcher.group( 2 ); |
| String entityValue = matcher.group( 5 ); |
| |
| addEntity( parser, entityName, entityValue ); |
| tmpLine = ""; |
| } |
| else |
| { |
| matcher = PATTERN_ENTITY_2.matcher( tmpLine ); |
| if ( matcher.find() && matcher.groupCount() == 8 ) |
| { |
| String entityName = matcher.group( 2 ); |
| String entityValue = matcher.group( 5 ); |
| |
| addEntity( parser, entityName, entityValue ); |
| tmpLine = ""; |
| } |
| } |
| } |
| } |
| catch ( IOException e ) |
| { |
| // nop |
| } |
| finally |
| { |
| IOUtil.close( reader ); |
| } |
| } |
| } |
| |
| /** |
| * Implementation of the callback mechanism <code>EntityResolver</code>. |
| * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>. |
| */ |
| public static class CachedFileEntityResolver |
| implements EntityResolver |
| { |
| /** Map with systemId as key and the content of systemId as byte[]. */ |
| protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<String, byte[]>(); |
| |
| /** {@inheritDoc} */ |
| public InputSource resolveEntity( String publicId, String systemId ) |
| throws SAXException, IOException |
| { |
| byte[] res = ENTITY_CACHE.get( systemId ); |
| // already cached? |
| if ( res == null ) |
| { |
| String systemName = FileUtils.getFile( systemId ).getName(); |
| File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName ); |
| // maybe already as a temp file? |
| if ( !temp.exists() ) |
| { |
| // is systemId a file or an url? |
| if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) ) |
| { |
| // Doxia XSDs are included in the jars, so try to find the resource systemName from |
| // the classpath... |
| String resource = "/" + systemName; |
| URL url = getClass().getResource( resource ); |
| if ( url != null ) |
| { |
| res = toByteArray( url ); |
| } |
| else |
| { |
| throw new SAXException( "Could not find the SYSTEM entity: " + systemId |
| + " because '" + resource + "' is not available of the classpath." ); |
| } |
| } |
| else |
| { |
| res = toByteArray( new URL( systemId ) ); |
| } |
| |
| // write systemId as temp file |
| copy( res, temp ); |
| } |
| else |
| { |
| // TODO How to refresh Doxia XSDs from temp dir? |
| res = toByteArray( temp.toURI().toURL() ); |
| } |
| |
| ENTITY_CACHE.put( systemId, res ); |
| } |
| |
| InputSource is = new InputSource( new ByteArrayInputStream( res ) ); |
| is.setPublicId( publicId ); |
| is.setSystemId( systemId ); |
| |
| return is; |
| } |
| |
| /** |
| * If url is not an http/https urls, call {@link IOUtil#toByteArray(java.io.InputStream)} to get the url |
| * content. |
| * Otherwise, use HttpClient to get the http content. |
| * Wrap all internal exceptions to throw SAXException. |
| * |
| * @param url not null |
| * @return return an array of byte |
| * @throws SAXException if any |
| */ |
| private static byte[] toByteArray( URL url ) |
| throws SAXException |
| { |
| if ( !( url.getProtocol().equalsIgnoreCase( "http" ) || url.getProtocol().equalsIgnoreCase( "https" ) ) ) |
| { |
| InputStream is = null; |
| try |
| { |
| is = url.openStream(); |
| if ( is == null ) |
| { |
| throw new SAXException( "Cannot open stream from the url: " + url.toString() ); |
| } |
| return IOUtil.toByteArray( is ); |
| } |
| catch ( IOException e ) |
| { |
| throw new SAXException( "IOException: " + e.getMessage(), e ); |
| } |
| finally |
| { |
| IOUtil.close( is ); |
| } |
| } |
| |
| // it is an HTTP url, using HttpClient... |
| DefaultHttpClient client = new DefaultHttpClient(); |
| HttpGet method = new HttpGet( url.toString() ); |
| // Set a user-agent that doesn't contain the word "java", otherwise it will be blocked by the W3C |
| // The default user-agent is "Apache-HttpClient/4.0.2 (java 1.5)" |
| method.setHeader( "user-agent", "Apache-Doxia/" + doxiaVersion() ); |
| |
| HttpRequestRetryHandler retryHandler = new DefaultHttpRequestRetryHandler( 3, false ); |
| client.setHttpRequestRetryHandler( retryHandler ); |
| |
| HttpEntity entity = null; |
| try |
| { |
| HttpResponse response = client.execute( method ); |
| int statusCode = response.getStatusLine().getStatusCode(); |
| if ( statusCode != HttpStatus.SC_OK ) |
| { |
| throw new IOException( "The status code when accessing the URL '" + url.toString() + "' was " |
| + statusCode + ", which is not allowed. The server gave this reason for the failure '" |
| + response.getStatusLine().getReasonPhrase() + "'." ); |
| } |
| |
| entity = response.getEntity(); |
| return EntityUtils.toByteArray( entity ); |
| } |
| catch ( ClientProtocolException e ) |
| { |
| throw new SAXException( "ClientProtocolException: Fatal protocol violation: " + e.getMessage(), e ); |
| } |
| catch ( IOException e ) |
| { |
| throw new SAXException( "IOException: Fatal transport error: " + e.getMessage(), e ); |
| } |
| finally |
| { |
| if ( entity != null ) |
| { |
| try |
| { |
| entity.consumeContent(); |
| } |
| catch ( IOException e ) |
| { |
| // Ignore |
| } |
| } |
| } |
| } |
| |
| /** |
| * Wrap {@link IOUtil#copy(byte[], OutputStream)} to throw SAXException. |
| * |
| * @param res not null array of byte |
| * @param f the file where to write the bytes |
| * @throws SAXException if any |
| * @see {@link IOUtil#copy(byte[], OutputStream)} |
| */ |
| private void copy( byte[] res, File f ) |
| throws SAXException |
| { |
| if ( f.isDirectory() ) |
| { |
| throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." ); |
| } |
| |
| OutputStream os = null; |
| try |
| { |
| os = new FileOutputStream( f ); |
| IOUtil.copy( res, os ); |
| } |
| catch ( IOException e ) |
| { |
| throw new SAXException( "IOException: " + e.getMessage(), e ); |
| } |
| finally |
| { |
| IOUtil.close( os ); |
| } |
| } |
| } |
| } |