/*
 * The Apache Software License, Version 1.1
 *
 *
 * Copyright (c) 1999 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Xerces" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation and was
 * originally based on software copyright (c) 1999, International
 * Business Machines, Inc., http://www.apache.org.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */


// Aug 21, 2000:
//  Added ability to omit DOCTYPE declaration.
//  Reported by Lars Martin <lars@smb-tec.com>
// Aug 25, 2000:
//  Added ability to omit comments.
//  Contributed by Anupam Bagchi <abagchi@jtcsv.com>


package org.apache.xml.serialize;


import java.util.Hashtable;

import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Node;
import org.w3c.dom.html.HTMLDocument;


/**
 * Specifies an output format to control the serializer. Based on the
 * XSLT specification for output format, plus additional parameters.
 * Used to select the suitable serializer and determine how the
 * document should be formatted on output.
 * <p>
 * The two interesting constructors are:
 * <ul>
 * <li>{@link #OutputFormat(String,String,boolean)} creates a format
 *  for the specified method (XML, HTML, Text, etc), encoding and indentation
 * <li>{@link #OutputFormat(Document,String,boolean)} creates a format
 *  compatible with the document type (XML, HTML, Text, etc), encoding and
 *  indentation
 * </ul>
 *
 *
 * @version $Revision$ $Date$
 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
 *         <a href="mailto:visco@intalio.com">Keith Visco</a>
 * @see Serializer
 * @see Method
 * @see LineSeparator
 */
public class OutputFormat
{


    public static class DTD
    {

        /**
         * Public identifier for HTML document type.
         */
        public static final String HTMLPublicId = "-//W3C//DTD HTML 4.0//EN";

        /**
         * System identifier for HTML document type.
         */
        public static final String HTMLSystemId =
            "http://www.w3.org/TR/WD-html-in-xml/DTD/xhtml1-strict.dtd";

        /**
         * Public identifier for XHTML document type.
         */
        public static final String XHTMLPublicId =
            "-//W3C//DTD XHTML 1.0 Strict//EN";

        /**
         * System identifier for XHTML document type.
         */
        public static final String XHTMLSystemId =
            "http://www.w3.org/TR/WD-html-in-xml/DTD/xhtml1-strict.dtd";

    }


    public static class Defaults
    {

        /**
         * If indentation is turned on, the default identation
         * level is 4.
         *
         * @see #setIndenting(boolean)
         */
        public static final int Indent = 4;

        /**
         * The default encoding for Web documents it UTF-8.
         *
         * @see #getEncoding()
         */
        public static final String Encoding = "UTF-8";

        /**
         * The default line width at which to break long lines
         * when identing. This is set to 72.
         */
        public static final int LineWidth = 72;

    }


    /**
     * Holds the output method specified for this document,
     * or null if no method was specified.
     */
    private String _method;


    /**
     * Specifies the version of the output method.
     */
    private String _version;


    /**
     * The indentation level, or zero if no indentation
     * was requested.
     */
    private int _indent = 0;


    /**
     * The encoding to use, if an input stream is used.
     * The default is always UTF-8.
     */
    private String _encoding = Defaults.Encoding;


    /**
     * The specified media type or null.
     */
    private String _mediaType;


    /**
     * The specified document type system identifier, or null.
     */
    private String _doctypeSystem;


    /**
     * The specified document type public identifier, or null.
     */
    private String _doctypePublic;


    /**
     * Ture if the XML declaration should be ommited;
     */
    private boolean _omitXmlDeclaration = false;


    /**
     * Ture if the DOCTYPE declaration should be ommited;
     */
    private boolean _omitDoctype = false;


    /**
<<<<<<< OutputFormat.java
     * Ture if comments should be ommited;
     */
    private boolean _omitComments = false;


    /**
     * Ture if the comments should be ommited;
     */
    private boolean _stripComments = false;


    /**
     * True if the document type should be marked as standalone.
     */
    private boolean _standalone = false;


    /**
     * List of element tag names whose text node children must
     * be output as CDATA.
     */
    private String[] _cdataElements;


    /**
     * List of element tag names whose text node children must
     * be output unescaped.
     */
    private String[] _nonEscapingElements;


    /**
     * The selected line separator.
     */
    private String _lineSeparator = LineSeparator.Web;


    /**
     * The line width at which to wrap long lines when indenting.
     */
    private int _lineWidth = Defaults.LineWidth;


    /**
     * True if spaces should be preserved in elements that do not
     * specify otherwise, or specify the default behavior.
     */
    private boolean _preserve = false;



    /**
     * Constructs a new output format with the default values.
     */
    public OutputFormat()
    {
    }


    /**
     * Constructs a new output format with the default values for
     * the specified method and encoding. If <tt>indent</tt>
     * is true, the document will be pretty printed with the default
     * indentation level and default line wrapping.
     *
     * @param method The specified output method
     * @param encoding The specified encoding
     * @param indenting True for pretty printing
     * @see #setEncoding
     * @see #setIndenting
     * @see #setMethod
     */
    public OutputFormat( String method, String encoding, boolean indenting )
    {
        setMethod( method );
        setEncoding( encoding );
        setIndenting( indenting );
    }


    /**
     * Constructs a new output format with the proper method,
     * document type identifiers and media type for the specified
     * document.
     *
     * @param doc The document to output
     * @see #whichMethod
     */
    public OutputFormat( Document doc )
    {
        setMethod( whichMethod( doc ) );
        setDoctype( whichDoctypePublic( doc ), whichDoctypeSystem( doc ) );
        setMediaType( whichMediaType( getMethod() ) );
    }


    /**
     * Constructs a new output format with the proper method,
     * document type identifiers and media type for the specified
     * document, and with the specified encoding. If <tt>indent</tt>
     * is true, the document will be pretty printed with the default
     * indentation level and default line wrapping.
     *
     * @param doc The document to output
     * @param encoding The specified encoding
     * @param indenting True for pretty printing
     * @see #setEncoding
     * @see #setIndenting
     * @see #whichMethod
     */
    public OutputFormat( Document doc, String encoding, boolean indenting )
    {
        this( doc );
        setEncoding( encoding );
        setIndenting( indenting );
    }


    /**
     * Returns the method specified for this output format.
     * Typically the method will be <tt>xml</tt>, <tt>html</tt>
     * or <tt>text</tt>, but it might be other values.
     * If no method was specified, null will be returned
     * and the most suitable method will be determined for
     * the document by calling {@link #whichMethod}.
     *
     * @return The specified output method, or null
     */
    public String getMethod()
    {
        return _method;
    }


    /**
     * Sets the method for this output format.
     *
     * @see #getMethod
     * @param method The output method, or null
     */
    public void setMethod( String method )
    {
        _method = method;
    }


    /**
     * Returns the version for this output method.
     * If no version was specified, will return null
     * and the default version number will be used.
     * If the serializerr does not support that particular
     * version, it should default to a supported version.
     *
     * @return The specified method version, or null
     */
    public String getVersion()
    {
        return _version;
    }


    /**
     * Sets the version for this output method.
     * For XML the value would be "1.0", for HTML
     * it would be "4.0".
     *
     * @see #getVersion
     * @param version The output method version, or null
     */
    public void setVersion( String version )
    {
        _version = version;
    }


    /**
     * Returns the indentation specified. If no indentation
     * was specified, zero is returned and the document
     * should not be indented.
     *
     * @return The indentation or zero
     * @see #setIndenting
     */
    public int getIndent()
    {
        return _indent;
    }


    /**
     * Returns true if indentation was specified.
     */
    public boolean getIndenting()
    {
        return ( _indent > 0 );
    }


    /**
     * Sets the indentation. The document will not be
     * indented if the indentation is set to zero.
     * Calling {@link #setIndenting} will reset this
     * value to zero (off) or the default (on).
     *
     * @param indent The indentation, or zero
     */
    public void setIndent( int indent )
    {
        if ( indent < 0 )
            _indent = 0;
        else
            _indent = indent;
    }


    /**
     * Sets the indentation on and off. When set on, the default
     * indentation level and default line wrapping is used
     * (see {@link #DEFAULT_INDENT} and {@link #DEFAULT_LINE_WIDTH}).
     * To specify a different indentation level or line wrapping,
     * use {@link #setIndent} and {@link #setLineWidth}.
     *
     * @param on True if indentation should be on
     */
    public void setIndenting( boolean on )
    {
        if ( on ) {
            _indent = Defaults.Indent;
            _lineWidth = Defaults.LineWidth;
        } else {
            _indent = 0;
            _lineWidth = 0;
        }
    }


    /**
     * Returns the specified encoding. If no encoding was
     * specified, the default is always "UTF-8".
     *
     * @return The encoding
     */
    public String getEncoding()
    {
        return _encoding;
    }


    /**
     * Sets the encoding for this output method. If no
     * encoding was specified, the default is always "UTF-8".
     * Make sure the encoding is compatible with the one
     * used by the {@link java.io.Writer}.
     *
     * @see #getEncoding
     * @param encoding The encoding, or null
     */
    public void setEncoding( String encoding )
    {
        _encoding = encoding;
    }


    /**
     * Returns the specified media type, or null.
     * To determine the media type based on the
     * document type, use {@link #whichMediaType}.
     *
     * @return The specified media type, or null
     */
    public String getMediaType()
    {
        return _mediaType;
    }


    /**
     * Sets the media type.
     *
     * @see #getMediaType
     * @param mediaType The specified media type
     */
    public void setMediaType( String mediaType )
    {
        _mediaType = mediaType;
    }


    /**
     * Sets the document type public and system identifiers.
     * Required only if the DOM Document or SAX events do not
     * specify the document type, and one must be present in
     * the serialized document. Any document type specified
     * by the DOM Document or SAX events will override these
     * values.
     *
     * @param publicId The public identifier, or null
     * @param systemId The system identifier, or null
     */
    public void setDoctype( String publicId, String systemId )
    {
        _doctypePublic = publicId;
        _doctypeSystem = systemId;
    }


    /**
     * Returns the specified document type public identifier,
     * or null.
     */
    public String getDoctypePublic()
    {
        return _doctypePublic;
    }


    /**
     * Returns the specified document type system identifier,
     * or null.
     */
    public String getDoctypeSystem()
    {
        return _doctypeSystem;
    }


    /**
     * Returns true if comments should be ommited.
     * The default is false.
     */
    public boolean getOmitComments()
    {
        return _omitComments;
    }


    /**
     * Sets comment omitting on and off.
     *
     * @param omit True if comments should be ommited
     */
    public void setOmitComments( boolean omit )
    {
        _omitComments = omit;
    }


    /**
     * Returns true if the DOCTYPE declaration should
     * be ommited. The default is false.
     */
    public boolean getOmitDocumentType()
    {
        return _omitDoctype;
    }


    /**
     * Sets DOCTYPE declaration omitting on and off.
     *
     * @param omit True if DOCTYPE declaration should be ommited
     */
    public void setOmitDocumentType( boolean omit )
    {
        _omitDoctype = omit;
    }


    /**
     * Returns true if the XML document declaration should
     * be ommited. The default is false.
     */
    public boolean getOmitXMLDeclaration()
    {
        return _omitXmlDeclaration;
    }


    /**
     * Sets XML declaration omitting on and off.
     *
     * @param omit True if XML declaration should be ommited
     */
    public void setOmitXMLDeclaration( boolean omit )
    {
        _omitXmlDeclaration = omit;
    }


    /**
     * Returns true if the document type is standalone.
     * The default is false.
     */
    public boolean getStandalone()
    {
        return _standalone;
    }


    /**
     * Sets document DTD standalone. The public and system
     * identifiers must be null for the document to be
     * serialized as standalone.
     *
     * @param standalone True if document DTD is standalone
     */
    public void setStandalone( boolean standalone )
    {
        _standalone = standalone;
    }


    /**
     * Returns a list of all the elements whose text node children
     * should be output as CDATA, or null if no such elements were
     * specified.
     */
    public String[] getCDataElements()
    {
        return _cdataElements;
    }


    /**
     * Returns true if the text node children of the given elements
     * should be output as CDATA.
     *
     * @param tagName The element's tag name
     * @return True if should serialize as CDATA
     */
    public boolean isCDataElement( String tagName )
    {
        int i;

        if ( _cdataElements == null )
            return false;
        for ( i = 0 ; i < _cdataElements.length ; ++i )
            if ( _cdataElements[ i ].equals( tagName ) )
                return true;
        return false;
    }


    /**
     * Sets the list of elements for which text node children
     * should be output as CDATA.
     *
     * @param cdataElements List of CDATA element tag names
     */
    public void setCDataElements( String[] cdataElements )
    {
        _cdataElements = cdataElements;
    }


    /**
     * Returns a list of all the elements whose text node children
     * should be output unescaped (no character references), or null
     * if no such elements were specified.
     */
    public String[] getNonEscapingElements()
    {
        return _nonEscapingElements;
    }


    /**
     * Returns true if the text node children of the given elements
     * should be output unescaped.
     *
     * @param tagName The element's tag name
     * @return True if should serialize unescaped
     */
    public boolean isNonEscapingElement( String tagName )
    {
        int i;

        if ( _nonEscapingElements == null )
            return false;
        for ( i = 0 ; i < _nonEscapingElements.length ; ++i )
            if ( _nonEscapingElements[ i ].equals( tagName ) )
                return true;
        return false;
    }


    /**
     * Sets the list of elements for which text node children
     * should be output unescaped (no character references).
     *
     * @param nonEscapingElements List of unescaped element tag names
     */
    public void setNonEscapingElements( String[] nonEscapingElements )
    {
        _nonEscapingElements = nonEscapingElements;
    }



    /**
     * Returns a specific line separator to use. The default is the
     * Web line separator (<tt>\n</tt>). A string is returned to
     * support double codes (CR + LF).
     *
     * @return The specified line separator
     */
    public String getLineSeparator()
    {
        return _lineSeparator;
    }


    /**
     * Sets the line separator. The default is the Web line separator
     * (<tt>\n</tt>). The machine's line separator can be obtained
     * from the system property <tt>line.separator</tt>, but is only
     * useful if the document is edited on machines of the same type.
     * For general documents, use the Web line separator.
     *
     * @param lineSeparator The specified line separator
     */
    public void setLineSeparator( String lineSeparator )
    {
        if ( lineSeparator == null )
            _lineSeparator =  LineSeparator.Web;
        else
            _lineSeparator = lineSeparator;
    }


    /**
     * Returns true if the default behavior for this format is to
     * preserve spaces. All elements that do not specify otherwise
     * or specify the default behavior will be formatted based on
     * this rule. All elements that specify space preserving will
     * always preserve space.
     */
    public boolean getPreserveSpace()
    {
        return _preserve;
    }


    /**
     * Sets space preserving as the default behavior. The default is
     * space stripping and all elements that do not specify otherwise
     * or use the default value will not preserve spaces.
     *
     * @param preserve True if spaces should be preserved
     */
    public void setPreserveSpace( boolean preserve )
    {
        _preserve = preserve;
    }


    /**
     * Return the selected line width for breaking up long lines.
     * When indenting, and only when indenting, long lines will be
     * broken at space boundaries based on this line width.
     * No line wrapping occurs if this value is zero.
     */
    public int getLineWidth()
    {
        return _lineWidth;
    }


    /**
     * Sets the line width. If zero then no line wrapping will
     * occur. Calling {@link #setIndenting} will reset this
     * value to zero (off) or the default (on).
     *
     * @param lineWidth The line width to use, zero for default
     * @see #getLineWidth
     * @see #setIndenting
     */
    public void setLineWidth( int lineWidth )
    {
        if ( lineWidth <= 0 )
            _lineWidth = 0;
        else
            _lineWidth = lineWidth;
    }


    /**
     * Returns the last printable character based on the selected
     * encoding. Control characters and non-printable characters
     * are always printed as character references.
     */
    public char getLastPrintable()
    {
        if ( getEncoding() != null &&
             ( getEncoding().equalsIgnoreCase( "ASCII" ) ) )
            return 0xFF;
        else
            return 0xFFFF;
    }


    /**
     * Determine the output method for the specified document.
     * If the document is an instance of {@link org.w3c.dom.html.HTMLDocument}
     * then the method is said to be <tt>html</tt>. If the root
     * element is 'html' and all text nodes preceding the root
     * element are all whitespace, then the method is said to be
     * <tt>html</tt>. Otherwise the method is <tt>xml</tt>.
     *
     * @param doc The document to check
     * @return The suitable method
     */
    public static String whichMethod( Document doc )
    {
        Node    node;
        String  value;
        int     i;

        // If document is derived from HTMLDocument then the default
        // method is html.
        if ( doc instanceof HTMLDocument )
            return Method.HTML;

        // Lookup the root element and the text nodes preceding it.
        // If root element is html and all text nodes contain whitespace
        // only, the method is html.

        // FIXME (SM) should we care about namespaces here?

        node = doc.getFirstChild();
        while (node != null) {
            // If the root element is html, the method is html.
            if ( node.getNodeType() == Node.ELEMENT_NODE ) {
                if ( node.getNodeName().equalsIgnoreCase( "html" ) ) {
                    return Method.HTML;
                } else if ( node.getNodeName().equalsIgnoreCase( "root" ) ) {
                    return Method.FOP;
                } else {
                    return Method.XML;
                }
            } else if ( node.getNodeType() == Node.TEXT_NODE ) {
                // If a text node preceding the root element contains
                // only whitespace, this might be html, otherwise it's
                // definitely xml.
                value = node.getNodeValue();
                for ( i = 0 ; i < value.length() ; ++i )
                    if ( value.charAt( i ) != 0x20 && value.charAt( i ) != 0x0A &&
                         value.charAt( i ) != 0x09 && value.charAt( i ) != 0x0D )
                        return Method.XML;
            }
            node = node.getNextSibling();
        }
        // Anything else, the method is xml.
        return Method.XML;
    }


    /**
     * Returns the document type public identifier
     * specified for this document, or null.
     */
    public static String whichDoctypePublic( Document doc )
    {
        DocumentType doctype;

        /* XXX  Delayed until DOM Level 2 is introduced into the code base
           doctype = doc.getDoctype();
           if ( doctype != null ) {
           // Note on catch: DOM Level 1 does not specify this method
           // and the code will throw a NoSuchMethodError
           try {
           return doctype.getPublicID();
           } catch ( Error except ) {  }
           }
        */
        if ( doc instanceof HTMLDocument )
            return DTD.XHTMLPublicId;
        return null;
    }


    /**
     * Returns the document type system identifier
     * specified for this document, or null.
     */
    public static String whichDoctypeSystem( Document doc )
    {
        DocumentType doctype;

        /* XXX  Delayed until DOM Level 2 is introduced into the code base
           doctype = doc.getDoctype();
           if ( doctype != null ) {
           // Note on catch: DOM Level 1 does not specify this method
           // and the code will throw a NoSuchMethodError
           try {
           return doctype.getSystemID();
           } catch ( Error except ) { }
           }
        */
        if ( doc instanceof HTMLDocument )
            return DTD.XHTMLSystemId;
        return null;
    }


    /**
     * Returns the suitable media format for a document
     * output with the specified method.
     */
    public static String whichMediaType( String method )
    {
        if ( method.equalsIgnoreCase( Method.XML ) )
            return "text/xml";
        if ( method.equalsIgnoreCase( Method.HTML ) )
            return "text/html";
        if ( method.equalsIgnoreCase( Method.XHTML ) )
            return "text/html";
        if ( method.equalsIgnoreCase( Method.TEXT ) )
            return "text/plain";
        if ( method.equalsIgnoreCase( Method.FOP ) )
            return "application/pdf";
        return null;
    }


}

