blob: 42e3281b5d503974a885496eeb6ae378fefc500b [file] [log] [blame]
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
// Aug 25, 2000:
// Fixed processing instruction printing inside element content
// to not escape content. Reported by Mikael Staldal
// <d96-mst@d.kth.se>
// Aug 25, 2000:
// Added ability to omit comments.
// Contributed by Anupam Bagchi <abagchi@jtcsv.com>
// Aug 26, 2000:
// Fixed bug in newline handling when preserving spaces.
// Contributed by Mike Dusseault <mdusseault@home.com>
// Aug 29, 2000:
// Fixed state.unescaped not being set to false when
// entering element state.
// Reported by Lowell Vaughn <lvaughn@agillion.com>
package org.apache.xml.serialize;
import java.io.Writer;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Vector;
import java.util.Hashtable;
import java.util.StringTokenizer;
import org.w3c.dom.*;
import org.xml.sax.DocumentHandler;
import org.xml.sax.DTDHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.ContentHandler;
import org.xml.sax.Attributes;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.ext.DeclHandler;
/**
* Base class for a serializer supporting both DOM and SAX pretty
* serializing of XML/HTML/XHTML documents. Derives classes perform
* the method-specific serializing, this class provides the common
* serializing mechanisms.
* <p>
* The serializer must be initialized with the proper writer and
* output format before it can be used by calling {@link #init}.
* The serializer can be reused any number of times, but cannot
* be used concurrently by two threads.
* <p>
* If an output stream is used, the encoding is taken from the
* output format (defaults to <tt>UTF-8</tt>). If a writer is
* used, make sure the writer uses the same encoding (if applies)
* as specified in the output format.
* <p>
* The serializer supports both DOM and SAX. DOM serializing is done
* by calling {@link #serialize} and SAX serializing is done by firing
* SAX events and using the serializer as a document handler.
* This also applies to derived class.
* <p>
* If an I/O exception occurs while serializing, the serializer
* will not throw an exception directly, but only throw it
* at the end of serializing (either DOM or SAX's {@link
* org.xml.sax.DocumentHandler#endDocument}.
* <p>
* For elements that are not specified as whitespace preserving,
* the serializer will potentially break long text lines at space
* boundaries, indent lines, and serialize elements on separate
* lines. Line terminators will be regarded as spaces, and
* spaces at beginning of line will be stripped.
* <p>
* When indenting, the serializer is capable of detecting seemingly
* element content, and serializing these elements indented on separate
* lines. An element is serialized indented when it is the first or
* last child of an element, or immediate following or preceding
* another element.
*
*
* @version $Revision$ $Date$
* @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
* @see Serializer
* @see DOMSerializer
*/
public abstract class BaseMarkupSerializer
implements ContentHandler, DocumentHandler, LexicalHandler,
DTDHandler, DeclHandler, DOMSerializer, Serializer
{
/**
* Identifies the last printable character in the Unicode range
* that is supported by the encoding used with this serializer.
* For 8-bit encodings this will be either 0x7E or 0xFF.
* For 16-bit encodings this will be 0xFFFF. Characters that are
* not printable will be escaped using character references.
*/
private int _lastPrintable = 0x7E;
/**
* Holds array of all element states that have been entered.
* The array is automatically resized. When leaving an element,
* it's state is not removed but reused when later returning
* to the same nesting level.
*/
private ElementState[] _elementStates;
/**
* The index of the next state to place in the array,
* or one plus the index of the current state. When zero,
* we are in no state.
*/
private int _elementStateCount;
/**
* Vector holding comments and PIs that come before the root
* element (even after it), see {@link #serializePreRoot}.
*/
private Vector _preRoot;
/**
* If the document has been started (header serialized), this
* flag is set to true so it's not started twice.
*/
protected boolean _started;
/**
* True if the serializer has been prepared. This flag is set
* to false when the serializer is reset prior to using it,
* and to true after it has been prepared for usage.
*/
private boolean _prepared;
/**
* Association between namespace URIs (keys) and prefixes (values).
* Accumulated here prior to starting an element and placing this
* list in the element state.
*/
protected Hashtable _prefixes;
/**
* The system identifier of the document type, if known.
*/
protected String _docTypePublicId;
/**
* The system identifier of the document type, if known.
*/
protected String _docTypeSystemId;
/**
* The output format associated with this serializer. This will never
* be a null reference. If no format was passed to the constructor,
* the default one for this document type will be used. The format
* object is never changed by the serializer.
*/
protected OutputFormat _format;
/**
* The printer used for printing text parts.
*/
protected Printer _printer;
/**
* True if indenting printer.
*/
protected boolean _indenting;
/**
* The underlying writer.
*/
private Writer _writer;
/**
* The output stream.
*/
private OutputStream _output;
//--------------------------------//
// Constructor and initialization //
//--------------------------------//
/**
* Protected constructor can only be used by derived class.
* Must initialize the serializer before serializing any document,
* see {@link #init}.
*/
protected BaseMarkupSerializer( OutputFormat format )
{
int i;
_elementStates = new ElementState[ 10 ];
for ( i = 0 ; i < _elementStates.length ; ++i )
_elementStates[ i ] = new ElementState();
_format = format;
}
public DocumentHandler asDocumentHandler()
throws IOException
{
prepare();
return this;
}
public ContentHandler asContentHandler()
throws IOException
{
prepare();
return this;
}
public DOMSerializer asDOMSerializer()
throws IOException
{
prepare();
return this;
}
public void setOutputByteStream( OutputStream output )
{
String encoding;
if ( output == null )
throw new NullPointerException( "SER001 Argument 'output' is null." );
_output = output;
_writer = null;
reset();
}
public void setOutputCharStream( Writer writer )
{
if ( writer == null )
throw new NullPointerException( "SER001 Argument 'writer' is null." );
_writer = writer;
_output = null;
reset();
}
public void setOutputFormat( OutputFormat format )
{
if ( format == null )
throw new NullPointerException( "SER001 Argument 'format' is null." );
_format = format;
reset();
}
public boolean reset()
{
if ( _elementStateCount > 1 )
throw new IllegalStateException( "Serializer reset in the middle of serialization" );
_prepared = false;
return true;
}
protected void prepare()
throws IOException
{
if ( _prepared )
return;
if ( _writer == null && _output == null )
throw new IOException( "SER002 No writer supplied for serializer" );
// If the output stream has been set, use it to construct
// the writer. It is possible that the serializer has been
// reused with the same output stream and different encoding.
if ( _output != null ) {
if ( _format.getEncoding() == null )
_writer = new OutputStreamWriter( _output );
else
_writer = Encodings.getWriter( _output, _format.getEncoding() );
}
// Determine the last printable character.
if ( _format.getEncoding() == null )
_lastPrintable = Encodings.getLastPrintable();
else
_lastPrintable = Encodings.getLastPrintable( _format.getEncoding() );
if ( _format.getIndenting() ) {
_indenting = true;
_printer = new IndentPrinter( _writer, _format );
} else {
_indenting = false;
_printer = new Printer( _writer, _format );
}
ElementState state;
_elementStateCount = 0;
state = _elementStates[ 0 ];
state.namespaceURI = null;
state.localName = null;
state.rawName = null;
state.preserveSpace = _format.getPreserveSpace();
state.empty = true;
state.afterElement = false;
state.doCData = state.inCData = false;
state.prefixes = null;
_docTypePublicId = _format.getDoctypePublic();
_docTypeSystemId = _format.getDoctypeSystem();
_started = false;
_prepared = true;
}
//----------------------------------//
// DOM document serializing methods //
//----------------------------------//
/**
* Serializes the DOM element using the previously specified
* writer and output format. Throws an exception only if
* an I/O exception occured while serializing.
*
* @param elem The element to serialize
* @throws IOException An I/O exception occured while
* serializing
*/
public void serialize( Element elem )
throws IOException
{
prepare();
serializeNode( elem );
_printer.flush();
if ( _printer.getException() != null )
throw _printer.getException();
}
/**
* Serializes the DOM document fragmnt using the previously specified
* writer and output format. Throws an exception only if
* an I/O exception occured while serializing.
*
* @param elem The element to serialize
* @throws IOException An I/O exception occured while
* serializing
*/
public void serialize( DocumentFragment frag )
throws IOException
{
prepare();
serializeNode( frag );
_printer.flush();
if ( _printer.getException() != null )
throw _printer.getException();
}
/**
* Serializes the DOM document using the previously specified
* writer and output format. Throws an exception only if
* an I/O exception occured while serializing.
*
* @param doc The document to serialize
* @throws IOException An I/O exception occured while
* serializing
*/
public void serialize( Document doc )
throws IOException
{
prepare();
serializeNode( doc );
serializePreRoot();
_printer.flush();
if ( _printer.getException() != null )
throw _printer.getException();
}
//------------------------------------------//
// SAX document handler serializing methods //
//------------------------------------------//
public void startDocument()
throws SAXException
{
try {
prepare();
} catch ( IOException except ) {
throw new SAXException( except.toString() );
}
// Nothing to do here. All the magic happens in startDocument(String)
}
public void characters( char[] chars, int start, int length )
{
ElementState state;
state = content();
// Check if text should be print as CDATA section or unescaped
// based on elements listed in the output format (the element
// state) or whether we are inside a CDATA section or entity.
if ( state.inCData || state.doCData ) {
int saveIndent;
// Print a CDATA section. The text is not escaped, but ']]>'
// appearing in the code must be identified and dealt with.
// The contents of a text node is considered space preserving.
if ( ! state.inCData ) {
_printer.printText( "<![CDATA[" );
state.inCData = true;
}
saveIndent = _printer.getNextIndent();
_printer.setNextIndent( 0 );
for ( int index = 0 ; index < length ; ++index ) {
if ( index + 2 < length && chars[ index ] == ']' &&
chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) {
printText( chars, start, index + 2, true, true );
_printer.printText( "]]><![CDATA[" );
start += index + 2;
length -= index + 2;
index = 0;
}
}
if ( length > 0 )
printText( chars, start, length, true, true );
_printer.setNextIndent( saveIndent );
} else {
int saveIndent;
if ( state.preserveSpace ) {
// If preserving space then hold of indentation so no
// excessive spaces are printed at line breaks, escape
// the text content without replacing spaces and print
// the text breaking only at line breaks.
saveIndent = _printer.getNextIndent();
_printer.setNextIndent( 0 );
printText( chars, start, length, true, state.unescaped );
_printer.setNextIndent( saveIndent );
} else {
printText( chars, start, length, false, state.unescaped );
}
}
}
public void ignorableWhitespace( char[] chars, int start, int length )
{
int i;
content();
// Print ignorable whitespaces only when indenting, after
// all they are indentation. Cancel the indentation to
// not indent twice.
if ( _indenting ) {
_printer.setThisIndent( 0 );
for ( i = start ; length-- > 0 ; ++i )
_printer.printText( chars[ i ] );
}
}
public void processingInstruction( String target, String code )
{
int index;
StringBuffer buffer;
ElementState state;
state = content();
buffer = new StringBuffer( 40 );
// Create the processing instruction textual representation.
// Make sure we don't have '?>' inside either target or code.
index = target.indexOf( "?>" );
if ( index >= 0 )
buffer.append( "<?" ).append( target.substring( 0, index ) );
else
buffer.append( "<?" ).append( target );
if ( code != null ) {
buffer.append( ' ' );
index = code.indexOf( "?>" );
if ( index >= 0 )
buffer.append( code.substring( 0, index ) );
else
buffer.append( code );
}
buffer.append( "?>" );
// If before the root element (or after it), do not print
// the PI directly but place it in the pre-root vector.
if ( isDocumentState() ) {
if ( _preRoot == null )
_preRoot = new Vector();
_preRoot.addElement( buffer.toString() );
} else {
_printer.indent();
printText( buffer.toString(), true, true );
_printer.unindent();
}
}
public void comment( char[] chars, int start, int length )
{
comment( new String( chars, start, length ) );
}
public void comment( String text )
{
StringBuffer buffer;
int index;
ElementState state;
if ( _format.getOmitComments() )
return;
state = content();
buffer = new StringBuffer( 40 );
// Create the processing comment textual representation.
// Make sure we don't have '-->' inside the comment.
index = text.indexOf( "-->" );
if ( index >= 0 )
buffer.append( "<!--" ).append( text.substring( 0, index ) ).append( "-->" );
else
buffer.append( "<!--" ).append( text ).append( "-->" );
// If before the root element (or after it), do not print
// the comment directly but place it in the pre-root vector.
if ( isDocumentState() ) {
if ( _preRoot == null )
_preRoot = new Vector();
_preRoot.addElement( buffer.toString() );
} else {
_printer.indent();
printText( buffer.toString(), false, true );
_printer.unindent();
}
}
public void startCDATA()
{
ElementState state;
state = getElementState();
state.doCData = true;
}
public void endCDATA()
{
ElementState state;
state = getElementState();
state.doCData = false;
}
public void startNonEscaping()
{
ElementState state;
state = getElementState();
state.unescaped = true;
}
public void endNonEscaping()
{
ElementState state;
state = getElementState();
state.unescaped = false;
}
public void startPreserving()
{
ElementState state;
state = getElementState();
state.preserveSpace = true;
}
public void endPreserving()
{
ElementState state;
state = getElementState();
state.preserveSpace = false;
}
/**
* Called at the end of the document to wrap it up.
* Will flush the output stream and throw an exception
* if any I/O error occured while serializing.
*
* @throws SAXException An I/O exception occured during
* serializing
*/
public void endDocument()
throws SAXException
{
// Print all the elements accumulated outside of
// the root element.
serializePreRoot();
// Flush the output, this is necessary for buffered output.
_printer.flush();
// If an exception was thrown during serializing, this would
// be the best time to report it.
if ( _printer.getException() != null )
throw new SAXException( _printer.getException() );
}
public void startEntity( String name )
{
// ???
}
public void endEntity( String name )
{
// ???
}
public void setDocumentLocator( Locator locator )
{
// Nothing to do
}
//-----------------------------------------//
// SAX content handler serializing methods //
//-----------------------------------------//
public void skippedEntity ( String name )
throws SAXException
{
endCDATA();
content();
_printer.printText( '&' );
_printer.printText( name );
_printer.printText( ';' );
}
public void startPrefixMapping( String prefix, String uri )
throws SAXException
{
if ( _prefixes == null )
_prefixes = new Hashtable();
_prefixes.put( uri, prefix == null ? "" : prefix );
}
public void endPrefixMapping( String prefix )
throws SAXException
{
}
//------------------------------------------//
// SAX DTD/Decl handler serializing methods //
//------------------------------------------//
public void startDTD( String name, String publicId, String systemId )
{
_printer.enterDTD();
_docTypePublicId = publicId;
_docTypeSystemId = systemId;
}
public void endDTD()
{
// Nothing to do here, all the magic occurs in startDocument(String).
}
public void elementDecl( String name, String model )
{
_printer.enterDTD();
_printer.printText( "<!ELEMENT " );
_printer.printText( name );
_printer.printText( ' ' );
_printer.printText( model );
_printer.printText( '>' );
if ( _indenting )
_printer.breakLine();
}
public void attributeDecl( String eName, String aName, String type,
String valueDefault, String value )
{
_printer.enterDTD();
_printer.printText( "<!ATTLIST " );
_printer.printText( eName );
_printer.printText( ' ' );
_printer.printText( aName );
_printer.printText( ' ' );
_printer.printText( type );
if ( valueDefault != null ) {
_printer.printText( ' ' );
_printer.printText( valueDefault );
}
if ( value != null ) {
_printer.printText( " \"" );
printEscaped( value );
_printer.printText( '"' );
}
_printer.printText( '>' );
if ( _indenting )
_printer.breakLine();
}
public void internalEntityDecl( String name, String value )
{
_printer.enterDTD();
_printer.printText( "<!ENTITY " );
_printer.printText( name );
_printer.printText( " \"" );
printEscaped( value );
_printer.printText( "\">" );
if ( _indenting )
_printer.breakLine();
}
public void externalEntityDecl( String name, String publicId, String systemId )
{
_printer.enterDTD();
unparsedEntityDecl( name, publicId, systemId, null );
}
public void unparsedEntityDecl( String name, String publicId,
String systemId, String notationName )
{
_printer.enterDTD();
if ( publicId == null ) {
_printer.printText( "<!ENTITY " );
_printer.printText( name );
_printer.printText( " SYSTEM " );
printDoctypeURL( systemId );
} else {
_printer.printText( "<!ENTITY " );
_printer.printText( name );
_printer.printText( " PUBLIC " );
printDoctypeURL( publicId );
_printer.printText( ' ' );
printDoctypeURL( systemId );
}
if ( notationName != null ) {
_printer.printText( " NDATA " );
_printer.printText( notationName );
}
_printer.printText( '>' );
if ( _indenting )
_printer.breakLine();
}
public void notationDecl( String name, String publicId, String systemId )
{
_printer.enterDTD();
if ( publicId != null ) {
_printer.printText( "<!NOTATION " );
_printer.printText( name );
_printer.printText( " PUBLIC " );
printDoctypeURL( publicId );
if ( systemId != null ) {
_printer.printText( ' ' );
printDoctypeURL( systemId );
}
} else {
_printer.printText( "<!NOTATION " );
_printer.printText( name );
_printer.printText( " SYSTEM " );
printDoctypeURL( systemId );
}
_printer.printText( '>' );
if ( _indenting )
_printer.breakLine();
}
//------------------------------------------//
// Generic node serializing methods methods //
//------------------------------------------//
/**
* Serialize the DOM node. This method is shared across XML, HTML and XHTML
* serializers and the differences are masked out in a separate {@link
* #serializeElement}.
*
* @param node The node to serialize
* @see #serializeElement
*/
protected void serializeNode( Node node )
{
// Based on the node type call the suitable SAX handler.
// Only comments entities and documents which are not
// handled by SAX are serialized directly.
switch ( node.getNodeType() ) {
case Node.TEXT_NODE : {
String text;
text = node.getNodeValue();
if ( text != null )
characters( node.getNodeValue() );
break;
}
case Node.CDATA_SECTION_NODE : {
String text;
text = node.getNodeValue();
if ( text != null ) {
startCDATA();
characters( node.getNodeValue() );
endCDATA();
}
break;
}
case Node.COMMENT_NODE : {
String text;
if ( ! _format.getOmitComments() ) {
text = node.getNodeValue();
if ( text != null )
comment( node.getNodeValue() );
}
break;
}
case Node.ENTITY_REFERENCE_NODE : {
Node child;
endCDATA();
content();
child = node.getFirstChild();
while ( child != null ) {
serializeNode( child );
child = child.getNextSibling();
}
break;
}
case Node.PROCESSING_INSTRUCTION_NODE :
processingInstruction( node.getNodeName(), node.getNodeValue() );
break;
case Node.ELEMENT_NODE :
serializeElement( (Element) node );
break;
case Node.DOCUMENT_NODE : {
DocumentType docType;
DOMImplementation domImpl;
NamedNodeMap map;
Entity entity;
Notation notation;
int i;
// If there is a document type, use the SAX events to
// serialize it.
docType = ( (Document) node ).getDoctype();
if (docType != null) {
// DOM Level 2 (or higher)
domImpl = ( (Document) node ).getImplementation();
try {
String internal;
startDTD( docType.getName(), docType.getPublicId(), docType.getSystemId() );
internal = docType.getInternalSubset();
if ( internal != null && internal.length() > 0 )
_printer.printText( internal );
endDTD();
}
// DOM Level 1 -- does implementation have methods?
catch (NoSuchMethodError nsme) {
Class docTypeClass = docType.getClass();
String docTypePublicId = null;
String docTypeSystemId = null;
try {
java.lang.reflect.Method getPublicId = docTypeClass.getMethod("getPublicId", null);
if (getPublicId.getReturnType().equals(String.class)) {
docTypePublicId = (String)getPublicId.invoke(docType, null);
}
}
catch (Exception e) {
// ignore
}
try {
java.lang.reflect.Method getSystemId = docTypeClass.getMethod("getSystemId", null);
if (getSystemId.getReturnType().equals(String.class)) {
docTypeSystemId = (String)getSystemId.invoke(docType, null);
}
}
catch (Exception e) {
// ignore
}
startDTD( docType.getName(), docTypePublicId, docTypeSystemId);
endDTD();
}
}
// !! Fall through
}
case Node.DOCUMENT_FRAGMENT_NODE : {
Node child;
// By definition this will happen if the node is a document,
// document fragment, etc. Just serialize its contents. It will
// work well for other nodes that we do not know how to serialize.
child = node.getFirstChild();
while ( child != null ) {
serializeNode( child );
child = child.getNextSibling();
}
break;
}
default:
break;
}
}
/**
* Must be called by a method about to print any type of content.
* If the element was just opened, the opening tag is closed and
* will be matched to a closing tag. Returns the current element
* state with <tt>empty</tt> and <tt>afterElement</tt> set to false.
*
* @return The current element state
*/
protected ElementState content()
{
ElementState state;
state = getElementState();
if ( ! isDocumentState() ) {
// Need to close CData section first
if ( state.inCData && ! state.doCData ) {
_printer.printText( "]]>" );
state.inCData = false;
}
// If this is the first content in the element,
// change the state to not-empty and close the
// opening element tag.
if ( state.empty ) {
_printer.printText( '>' );
state.empty = false;
}
// Except for one content type, all of them
// are not last element. That one content
// type will take care of itself.
state.afterElement = false;
}
return state;
}
/**
* Called to print the text contents in the prevailing element format.
* Since this method is capable of printing text as CDATA, it is used
* for that purpose as well. White space handling is determined by the
* current element state. In addition, the output format can dictate
* whether the text is printed as CDATA or unescaped.
*
* @param text The text to print
* @param unescaped True is should print unescaped
*/
protected void characters( String text )
{
ElementState state;
state = content();
// Check if text should be print as CDATA section or unescaped
// based on elements listed in the output format (the element
// state) or whether we are inside a CDATA section or entity.
if ( state.inCData || state.doCData ) {
StringBuffer buffer;
int index;
int saveIndent;
// Print a CDATA section. The text is not escaped, but ']]>'
// appearing in the code must be identified and dealt with.
// The contents of a text node is considered space preserving.
buffer = new StringBuffer( text.length() );
if ( ! state.inCData ) {
buffer.append( "<![CDATA[" );
state.inCData = true;
}
index = text.indexOf( "]]>" );
while ( index >= 0 ) {
buffer.append( text.substring( 0, index + 2 ) ).append( "]]><![CDATA[" );
text = text.substring( index + 2 );
index = text.indexOf( "]]>" );
}
buffer.append( text );
saveIndent = _printer.getNextIndent();
_printer.setNextIndent( 0 );
printText( buffer.toString(), true, true );
_printer.setNextIndent( saveIndent );
} else {
int saveIndent;
if ( state.preserveSpace ) {
// If preserving space then hold of indentation so no
// excessive spaces are printed at line breaks, escape
// the text content without replacing spaces and print
// the text breaking only at line breaks.
saveIndent = _printer.getNextIndent();
_printer.setNextIndent( 0 );
printText( text, true, state.unescaped );
_printer.setNextIndent( saveIndent );
} else {
printText( text, false, state.unescaped );
}
}
}
/**
* Returns the suitable entity reference for this character value,
* or null if no such entity exists. Calling this method with <tt>'&amp;'</tt>
* will return <tt>"&amp;amp;"</tt>.
*
* @param ch Character value
* @return Character entity name, or null
*/
protected abstract String getEntityRef( char ch );
/**
* Called to serializee the DOM element. The element is serialized based on
* the serializer's method (XML, HTML, XHTML).
*
* @param elem The element to serialize
*/
protected abstract void serializeElement( Element elem );
/**
* Comments and PIs cannot be serialized before the root element,
* because the root element serializes the document type, which
* generally comes first. Instead such PIs and comments are
* accumulated inside a vector and serialized by calling this
* method. Will be called when the root element is serialized
* and when the document finished serializing.
*/
protected void serializePreRoot()
{
int i;
if ( _preRoot != null ) {
for ( i = 0 ; i < _preRoot.size() ; ++i ) {
printText( (String) _preRoot.elementAt( i ), true, true );
_printer.breakLine();
}
_preRoot.removeAllElements();
}
}
//---------------------------------------------//
// Text pretty printing and formatting methods //
//---------------------------------------------//
/**
* Called to print additional text with whitespace handling.
* If spaces are preserved, the text is printed as if by calling
* {@link #printText(String)} with a call to {@link #breakLine}
* for each new line. If spaces are not preserved, the text is
* broken at space boundaries if longer than the line width;
* Multiple spaces are printed as such, but spaces at beginning
* of line are removed.
*
* @param text The text to print
* @param preserveSpace Space preserving flag
* @param unescaped Print unescaped
*/
protected final void printText( char[] chars, int start, int length,
boolean preserveSpace, boolean unescaped )
{
int index;
char ch;
if ( preserveSpace ) {
// Preserving spaces: the text must print exactly as it is,
// without breaking when spaces appear in the text and without
// consolidating spaces. If a line terminator is used, a line
// break will occur.
while ( length-- > 0 ) {
ch = chars[ start ];
++start;
if ( ch == '\n' || ch == '\r' || unescaped )
_printer.printText( ch );
else
printEscaped( ch );
}
} else {
// Not preserving spaces: print one part at a time, and
// use spaces between parts to break them into different
// lines. Spaces at beginning of line will be stripped
// by printing mechanism. Line terminator is treated
// no different than other text part.
while ( length-- > 0 ) {
ch = chars[ start ];
++start;
if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' )
_printer.printSpace();
else if ( unescaped )
_printer.printText( ch );
else
printEscaped( ch );
}
}
}
protected final void printText( String text, boolean preserveSpace, boolean unescaped )
{
int index;
char ch;
if ( preserveSpace ) {
// Preserving spaces: the text must print exactly as it is,
// without breaking when spaces appear in the text and without
// consolidating spaces. If a line terminator is used, a line
// break will occur.
for ( index = 0 ; index < text.length() ; ++index ) {
ch = text.charAt( index );
if ( ch == '\n' || ch == '\r' || unescaped )
_printer.printText( ch );
else
printEscaped( ch );
}
} else {
// Not preserving spaces: print one part at a time, and
// use spaces between parts to break them into different
// lines. Spaces at beginning of line will be stripped
// by printing mechanism. Line terminator is treated
// no different than other text part.
for ( index = 0 ; index < text.length() ; ++index ) {
ch = text.charAt( index );
if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' )
_printer.printSpace();
else if ( unescaped )
_printer.printText( ch );
else
printEscaped( ch );
}
}
}
/**
* Print a document type public or system identifier URL.
* Encapsulates the URL in double quotes, escapes non-printing
* characters and print it equivalent to {@link #printText}.
*
* @param url The document type url to print
*/
protected void printDoctypeURL( String url )
{
int i;
_printer.printText( '"' );
for( i = 0 ; i < url.length() ; ++i ) {
if ( url.charAt( i ) == '"' || url.charAt( i ) < 0x20 || url.charAt( i ) > 0x7F ) {
_printer.printText( '%' );
_printer.printText( Integer.toHexString( url.charAt( i ) ) );
} else
_printer.printText( url.charAt( i ) );
}
_printer.printText( '"' );
}
protected void printEscaped( char ch )
{
String charRef;
// If there is a suitable entity reference for this
// character, print it. The list of available entity
// references is almost but not identical between
// XML and HTML.
charRef = getEntityRef( ch );
if ( charRef != null ) {
_printer.printText( '&' );
_printer.printText( charRef );
_printer.printText( ';' );
} else if ( ( ch >= ' ' && ch <= _lastPrintable && ch != 0xF7 ) ||
ch == '\n' || ch == '\r' || ch == '\t' ) {
// If the character is not printable, print as character reference.
// Non printables are below ASCII space but not tab or line
// terminator, ASCII delete, or above a certain Unicode threshold.
_printer.printText( ch );
} else {
_printer.printText( "&#" );
_printer.printText( Integer.toString( ch ) );
_printer.printText( ';' );
}
}
/**
* Escapes a string so it may be printed as text content or attribute
* value. Non printable characters are escaped using character references.
* Where the format specifies a deault entity reference, that reference
* is used (e.g. <tt>&amp;lt;</tt>).
*
* @param source The string to escape
*/
protected void printEscaped( String source )
{
for ( int i = 0 ; i < source.length() ; ++i )
printEscaped( source.charAt( i ) );
}
//--------------------------------//
// Element state handling methods //
//--------------------------------//
/**
* Return the state of the current element.
*
* @return Current element state
*/
protected ElementState getElementState()
{
return _elementStates[ _elementStateCount ];
}
/**
* Enter a new element state for the specified element.
* Tag name and space preserving is specified, element
* state is initially empty.
*
* @return Current element state, or null
*/
protected ElementState enterElementState( String namespaceURI, String localName,
String rawName, boolean preserveSpace )
{
ElementState state;
if ( _elementStateCount + 1 == _elementStates.length ) {
ElementState[] newStates;
// Need to create a larger array of states. This does not happen
// often, unless the document is really deep.
newStates = new ElementState[ _elementStates.length + 10 ];
for ( int i = 0 ; i < _elementStates.length ; ++i )
newStates[ i ] = _elementStates[ i ];
for ( int i = _elementStates.length ; i < newStates.length ; ++i )
newStates[ i ] = new ElementState();
_elementStates = newStates;
}
++_elementStateCount;
state = _elementStates[ _elementStateCount ];
state.namespaceURI = namespaceURI;
state.localName = localName;
state.rawName = rawName;
state.preserveSpace = preserveSpace;
state.empty = true;
state.afterElement = false;
state.doCData = state.inCData = false;
state.unescaped = false;
state.prefixes = _prefixes;
_prefixes = null;
return state;
}
/**
* Leave the current element state and return to the
* state of the parent element. If this was the root
* element, return to the state of the document.
*
* @return Previous element state
*/
protected ElementState leaveElementState()
{
if ( _elementStateCount > 0 ) {
_prefixes = _elementStates[ _elementStateCount ].prefixes;
-- _elementStateCount;
return _elementStates[ _elementStateCount ];
} else
throw new IllegalStateException( "Internal error: element state is zero" );
}
/**
* Returns true if in the state of the document.
* Returns true before entering any element and after
* leaving the root element.
*
* @return True if in the state of the document
*/
protected boolean isDocumentState()
{
return _elementStateCount == 0;
}
/**
* Returns the namespace prefix for the specified URI.
* If the URI has been mapped to a prefix, returns the
* prefix, otherwise returns null.
*
* @param namespaceURI The namespace URI
* @return The namespace prefix if known, or null
*/
protected String getPrefix( String namespaceURI )
{
String prefix;
if ( _prefixes != null ) {
prefix = (String) _prefixes.get( namespaceURI );
if ( prefix != null )
return prefix;
}
if ( _elementStateCount == 0 )
return null;
else {
for ( int i = _elementStateCount ; i > 0 ; --i ) {
if ( _elementStates[ i ].prefixes != null ) {
prefix = (String) _elementStates[ i ].prefixes.get( namespaceURI );
if ( prefix != null )
return prefix;
}
}
}
return null;
}
}