blob: aad60f5276d9085c6cdc76ebcf8d0217027dc21b [file] [log] [blame]
package org.apache.maven.archetype.common.util;
/*
* Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions, and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions, and the disclaimer that follows
* these conditions in the documentation and/or other materials
* provided with the distribution.
*
* 3. The name "JDOM" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact <request_AT_jdom_DOT_org>.
*
* 4. Products derived from this software may not be called "JDOM", nor
* may "JDOM" appear in their name, without prior written permission
* from the JDOM Project Management <request_AT_jdom_DOT_org>.
*
* In addition, we request (but do not require) that you include in the
* end-user documentation provided with the redistribution and/or in the
* software itself an acknowledgement equivalent to the following:
* "This product includes software developed by the
* JDOM Project (http://www.jdom.org/)."
* Alternatively, the acknowledgment may be graphical using the logos
* available at http://www.jdom.org/images/logos.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the JDOM Project and was originally
* created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
* Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
* on the JDOM Project, please see <http://www.jdom.org/>.
*/
import org.jdom.output.EscapeStrategy;
import java.lang.reflect.Method;
/**
* <p>Class to encapsulate XMLOutputter format options.
* Typical users can use the standard format configurations obtained by
* {@link #getRawFormat} (no whitespace changes),
* {@link #getPrettyFormat} (whitespace beautification), and
* {@link #getCompactFormat} (whitespace normalization).</p>
*
* <p>Several modes are available to effect the way textual content is printed.
* See the documentation for {@link TextMode} for details.</p>
*
* @author Jason Hunter
*/
public class Format
implements Cloneable
{
/**
* Returns a new Format object that performs no whitespace changes, uses
* the UTF-8 encoding, doesn't expand empty elements, includes the
* declaration and encoding, and uses the default entity escape strategy.
* Tweaks can be made to the returned Format instance without affecting
* other instances.
*
* @return a Format with no whitespace changes
*/
public static Format getRawFormat()
{
return new Format();
}
/**
* Returns a new Format object that performs whitespace beautification with
* 2-space indents, uses the UTF-8 encoding, doesn't expand empty elements,
* includes the declaration and encoding, and uses the default entity
* escape strategy.
* Tweaks can be made to the returned Format instance without affecting
* other instances.
*
* @return a Format with whitespace beautification
*/
public static Format getPrettyFormat()
{
Format f = new Format();
f.setIndent( STANDARD_INDENT );
f.setTextMode( TextMode.TRIM );
return f;
}
/**
* Returns a new Format object that performs whitespace normalization, uses
* the UTF-8 encoding, doesn't expand empty elements, includes the
* declaration and encoding, and uses the default entity escape strategy.
* Tweaks can be made to the returned Format instance without affecting
* other instances.
*
* @return a Format with whitespace normalization
*/
public static Format getCompactFormat()
{
Format f = new Format();
f.setTextMode( TextMode.NORMALIZE );
return f;
}
/** standard value to indent by, if we are indenting */
private static final String STANDARD_INDENT = " ";
/** standard string with which to end a line */
private static final String STANDARD_LINE_SEPARATOR = "\r\n";
/** standard encoding */
private static final String STANDARD_ENCODING = "UTF-8";
/** The default indent is no spaces (as original document) */
String indent = null;
/** New line separator */
String lineSeparator = STANDARD_LINE_SEPARATOR;
/** The encoding format */
String encoding = STANDARD_ENCODING;
/**
* Whether or not to output the XML declaration
* - default is <code>false</code>
*/
boolean omitDeclaration = false;
/**
* Whether or not to output the encoding in the XML declaration
* - default is <code>false</code>
*/
boolean omitEncoding = false;
/**
* Whether or not to expand empty elements to
* &lt;tagName&gt;&lt;/tagName&gt; - default is <code>false</code>
*/
boolean expandEmptyElements = false;
/**
* Whether TrAX output escaping disabling/enabling PIs are ignored
* or processed - default is <code>false</code>
*/
boolean ignoreTrAXEscapingPIs = false;
/** text handling mode */
TextMode mode = TextMode.PRESERVE;
/** entity escape logic */
EscapeStrategy escapeStrategy = new DefaultEscapeStrategy( encoding );
/** Creates a new Format instance with default (raw) behavior. */
private Format()
{
}
/**
* Sets the {@link EscapeStrategy} to use for character escaping.
*
* @param strategy the EscapeStrategy to use
* @return a pointer to this Format for chaining
*/
public Format setEscapeStrategy( EscapeStrategy strategy )
{
escapeStrategy = strategy;
return this;
}
/**
* Returns the current escape strategy
*
* @return the current escape strategy
*/
public EscapeStrategy getEscapeStrategy()
{
return escapeStrategy;
}
/**
* <p>This will set the newline separator (<code>lineSeparator</code>).
* The default is <code>\r\n</code>. Note that if the "newlines"
* property is false, this value is irrelevant. To make it output
* the system default line ending string, call
* <code>setLineSeparator(System.getProperty("line.separator"))</code></p>
*
* <p>To output "UNIX-style" documents, call
* <code>setLineSeparator("\n")</code>. To output "Mac-style"
* documents, call <code>setLineSeparator("\r")</code>. DOS-style
* documents use CR-LF ("\r\n"), which is the default.</p>
*
* <p>Note that this only applies to newlines generated by the
* outputter. If you parse an XML document that contains newlines
* embedded inside a text node, and you do not set TextMode.NORMALIZE,
* then the newlines will be output
* verbatim, as "\n" which is how parsers normalize them.
* </p>
*
* @param separator <code>String</code> line separator to use.
* @return a pointer to this Format for chaining
* @see #setTextMode
*/
public Format setLineSeparator( String separator )
{
this.lineSeparator = separator;
return this;
}
/**
* Returns the current line separator.
*
* @return the current line separator
*/
public String getLineSeparator()
{
return lineSeparator;
}
/**
* This will set whether the XML declaration
* (<code>&lt;&#063;xml version="1&#046;0"
* encoding="UTF-8"&#063;&gt;</code>)
* includes the encoding of the document. It is common to omit
* this in uses such as WML and other wireless device protocols.
*
* @param omitEncoding <code>boolean</code> indicating whether or not
* the XML declaration should indicate the document encoding.
* @return a pointer to this Format for chaining
*/
public Format setOmitEncoding( boolean omitEncoding )
{
this.omitEncoding = omitEncoding;
return this;
}
/**
* Returns whether the XML declaration encoding will be omitted.
*
* @return whether the XML declaration encoding will be omitted
*/
public boolean getOmitEncoding()
{
return omitEncoding;
}
/**
* This will set whether the XML declaration
* (<code>&lt;&#063;xml version="1&#046;0"&#063;gt;</code>)
* will be omitted or not. It is common to omit this in uses such
* as SOAP and XML-RPC calls.
*
* @param omitDeclaration <code>boolean</code> indicating whether or not
* the XML declaration should be omitted.
* @return a pointer to this Format for chaining
*/
public Format setOmitDeclaration( boolean omitDeclaration )
{
this.omitDeclaration = omitDeclaration;
return this;
}
/**
* Returns whether the XML declaration will be omitted.
*
* @return whether the XML declaration will be omitted
*/
public boolean getOmitDeclaration()
{
return omitDeclaration;
}
/**
* This will set whether empty elements are expanded from
* <code>&lt;tagName/&gt;</code> to
* <code>&lt;tagName&gt;&lt;/tagName&gt;</code>.
*
* @param expandEmptyElements <code>boolean</code> indicating whether or not
* empty elements should be expanded.
* @return a pointer to this Format for chaining
*/
public Format setExpandEmptyElements( boolean expandEmptyElements )
{
this.expandEmptyElements = expandEmptyElements;
return this;
}
/**
* Returns whether empty elements are expanded.
*
* @return whether empty elements are expanded
*/
public boolean getExpandEmptyElements()
{
return expandEmptyElements;
}
/**
* <p>This will set whether JAXP TrAX processing instructions for
* disabling/enabling output escaping are ignored. Disabling
* output escaping allows using XML text as element content and
* outputing it verbatim, i&#46;e&#46; as element children would be.</p>
*
* <p>When processed, these processing instructions are removed from
* the generated XML text and control whether the element text
* content is output verbatim or with escaping of the pre-defined
* entities in XML 1.0. The text to be output verbatim shall be
* surrounded by the
* <code>&lt;?javax.xml.transform.disable-output-escaping ?&gt;</code>
* and <code>&lt;?javax.xml.transform.enable-output-escaping ?&gt;</code>
* PIs.</p>
*
* <p>When ignored, the processing instructions are present in the
* generated XML text and the pre-defined entities in XML 1.0 are
* escaped.</p>
*
* Default: <code>false</code>.
*
* @param ignoreTrAXEscapingPIs <code>boolean</code> indicating
* whether or not TrAX ouput escaping PIs are ignored.
* @see javax.xml.transform.Result#PI_ENABLE_OUTPUT_ESCAPING
* @see javax.xml.transform.Result#PI_DISABLE_OUTPUT_ESCAPING
*/
public void setIgnoreTrAXEscapingPIs( boolean ignoreTrAXEscapingPIs )
{
this.ignoreTrAXEscapingPIs = ignoreTrAXEscapingPIs;
}
/**
* Returns whether JAXP TrAX processing instructions for
* disabling/enabling output escaping are ignored.
*
* @return whether or not TrAX ouput escaping PIs are ignored.
*/
public boolean getIgnoreTrAXEscapingPIs()
{
return ignoreTrAXEscapingPIs;
}
/**
* This sets the text output style. Options are available as static
* {@link TextMode} instances. The default is {@link TextMode#PRESERVE}.
*
* @return a pointer to this Format for chaining
*/
public Format setTextMode( Format.TextMode mode )
{
this.mode = mode;
return this;
}
/**
* Returns the current text output style.
*
* @return the current text output style
*/
public Format.TextMode getTextMode()
{
return mode;
}
/**
* This will set the indent <code>String</code> to use; this
* is usually a <code>String</code> of empty spaces. If you pass
* null, or the empty string (""), then no indentation will
* happen. Default: none (null)
*
* @param indent <code>String</code> to use for indentation.
* @return a pointer to this Format for chaining
*/
public Format setIndent( String indent )
{
// if passed the empty string, change it to null, for marginal
// performance gains later (can compare to null first instead
// of calling equals())
if ( "".equals( indent ) )
{
indent = null;
}
this.indent = indent;
return this;
}
/**
* Returns the indent string in use.
*
* @return the indent string in use
*/
public String getIndent()
{
return indent;
}
/**
* Sets the output encoding. The name should be an accepted XML
* encoding.
*
* @param encoding the encoding format. Use XML-style names like
* "UTF-8" or "ISO-8859-1" or "US-ASCII"
* @return a pointer to this Format for chaining
*/
public Format setEncoding( String encoding )
{
this.encoding = encoding;
escapeStrategy = new DefaultEscapeStrategy( encoding );
return this;
}
/**
* Returns the configured output encoding.
*
* @return the output encoding
*/
public String getEncoding()
{
return encoding;
}
@Override
protected Object clone()
{
Format format = null;
try
{
format = (Format) super.clone();
}
catch ( CloneNotSupportedException ce )
{
}
return format;
}
/**
* Handle common charsets quickly and easily. Use reflection
* to query the JDK 1.4 CharsetEncoder class for unknown charsets.
* If JDK 1.4 isn't around, default to no special encoding.
*/
class DefaultEscapeStrategy
implements EscapeStrategy
{
private int bits;
Object encoder;
Method canEncode;
public DefaultEscapeStrategy( String encoding )
{
if ( "UTF-8".equalsIgnoreCase( encoding )
|| "UTF-16".equalsIgnoreCase( encoding ) )
{
bits = 16;
}
else if ( "ISO-8859-1".equalsIgnoreCase( encoding )
|| "Latin1".equalsIgnoreCase( encoding ) )
{
bits = 8;
}
else if ( "US-ASCII".equalsIgnoreCase( encoding )
|| "ASCII".equalsIgnoreCase( encoding ) )
{
bits = 7;
}
else
{
bits = 0;
//encoder = Charset.forName(encoding).newEncoder();
try
{
Class<?> charsetClass = Class.forName( "java.nio.charset.Charset" );
Class<?> encoderClass = Class.forName( "java.nio.charset.CharsetEncoder" );
Method forName = charsetClass.getMethod( "forName", new Class[] { String.class } );
Object charsetObj = forName.invoke( null, new Object[] { encoding } );
Method newEncoder = charsetClass.getMethod( "newEncoder" );
encoder = newEncoder.invoke( charsetObj );
canEncode = encoderClass.getMethod( "canEncode", new Class[] { char.class } );
}
catch ( Exception ignored )
{
}
}
}
@Override
public boolean shouldEscape( char ch )
{
if ( bits == 16 )
{
return false;
}
if ( bits == 8 )
{
return ( ch > 255 );
}
if ( bits == 7 )
{
return ( ch > 127 );
}
else
{
if ( canEncode != null && encoder != null )
{
try
{
Boolean val = (Boolean) canEncode.invoke( encoder, new Object[] { Character.valueOf( ch ) } );
return !val.booleanValue();
}
catch ( Exception ignored )
{
}
}
// Return false if we don't know. This risks not escaping
// things which should be escaped, but also means people won't
// start getting loads of unnecessary escapes.
return false;
}
}
}
/**
* <p>
* Class to signify how text should be handled on output. The following
* table provides details.</p>
* <table>
* <caption>TextMode details</caption>
* <tr>
* <th align="left">
* Text Mode
* </th>
* <th>
* Resulting behavior.
* </th>
* </tr>
* <tr valign="top">
* <td>
* <i>PRESERVE (Default)</i>
* </td>
* <td>
* All content is printed in the format it was created, no whitespace
* or line separators are are added or removed.
* </td>
* </tr>
* <tr valign="top">
* <td>
* TRIM_FULL_WHITE
* </td>
* <td>
* Content between tags consisting of all whitespace is not printed.
* If the content contains even one non-whitespace character, it is
* printed verbatim, whitespace and all.
* </td>
* </tr>
* <tr valign="top">
* <td>
* TRIM
* </td>
* <td>
* Same as TrimAllWhite, plus leading/trailing whitespace are
* trimmed.
* </td>
* </tr>
* <tr valign="top">
* <td>
* NORMALIZE
* </td>
* <td>
* Same as TextTrim, plus addition interior whitespace is compressed
* to a single space.
* </td>
* </tr>
* </table>
*
* <p>In most cases textual content is aligned with the surrounding tags
* (after the appropriate text mode is applied). In the case where the only
* content between the start and end tags is textual, the start tag, text,
* and end tag are all printed on the same line. If the document being
* output already has whitespace, it's wise to turn on TRIM mode so the
* pre-existing whitespace can be trimmed before adding new whitespace.</p>
*
* <p>When a element has a xml:space attribute with the value of "preserve",
* all formating is turned off and reverts back to the default until the
* element and its contents have been printed. If a nested element contains
* another xml:space with the value "default" formatting is turned back on
* for the child element and then off for the remainder of the parent
* element.</p>
*/
public static class TextMode
{
/** Mode for literal text preservation. */
public static final TextMode PRESERVE = new TextMode( "PRESERVE" );
/** Mode for text trimming (left and right trim). */
public static final TextMode TRIM = new TextMode( "TRIM" );
/**
* Mode for text normalization (left and right trim plus internal
* whitespace is normalized to a single space.
*
* @see org.jdom.Element#getTextNormalize
*/
public static final TextMode NORMALIZE = new TextMode( "NORMALIZE" );
/**
* Mode for text trimming of content consisting of nothing but
* whitespace but otherwise not changing output.
*/
public static final TextMode TRIM_FULL_WHITE =
new TextMode( "TRIM_FULL_WHITE" );
private final String name;
private TextMode( String name )
{
this.name = name;
}
@Override
public String toString()
{
return name;
}
}
}