| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.axiom.util.stax.dialect; |
| |
| import javax.xml.stream.XMLInputFactory; |
| import javax.xml.stream.XMLOutputFactory; |
| import javax.xml.stream.XMLStreamConstants; |
| import javax.xml.stream.XMLStreamWriter; |
| |
| /** |
| * Encapsulates the specific characteristics of a particular StAX implementation. |
| * In particular, an implementation of this interface is able to wrap (if necessary) the |
| * readers and writers produced by the StAX implementation to make them conform to the |
| * StAX specifications. This is called <em>normalization</em>. |
| * <p> |
| * In addition to bugs in particular StAX implementations and clear violations of the StAX |
| * specifications, the following ambiguities and gray areas in the specifications are also addressed |
| * by the dialect implementations: |
| * <ul> |
| * <li>The specifications don't tell whether it is allowed to use a <code>null</code> value |
| * for the charset encoding parameter in the following methods: |
| * <ul> |
| * <li>{@link XMLOutputFactory#createXMLEventWriter(java.io.OutputStream, String)}</li> |
| * <li>{@link XMLOutputFactory#createXMLStreamWriter(java.io.OutputStream, String)}</li> |
| * <li>{@link javax.xml.stream.XMLStreamWriter#writeStartDocument(String, String)}</li> |
| * </ul> |
| * Some implementations accept <code>null</code> values, while others throw an exception. |
| * To make sure that code written to run with a normalized {@link XMLOutputFactory} remains |
| * portable, the dialect implementation normalizes the behavior of these methods so that they |
| * consistently throw an exception when called with a <code>null</code> encoding. Note that |
| * the type of exception to be thrown remains unspecified.</li> |
| * <li>The StAX specifications require that {@link javax.xml.stream.XMLStreamReader#getEncoding()} |
| * returns the "input encoding if known or <code>null</code> if unknown". This requirement |
| * is not precise enough to guarantee consistent behavior across different implementations. |
| * In order to provide the consumer of the stream reader with complete and unambiguous information about |
| * the encoding of the underlying stream, the dialect implementations normalize the |
| * behavior of the {@link javax.xml.stream.XMLStreamReader#getEncoding()} method such that |
| * it returns a non null value if and only if the reader was created from a byte stream, in |
| * which case the return value is the effective charset encoding used by the parser to |
| * decode the byte stream. According to the XML specifications, this value is determined |
| * by one of the following means: |
| * <ul> |
| * <li>The encoding was provided when the stream reader was created, i.e. as a parameter |
| * to the {@link javax.xml.stream.XMLInputFactory#createXMLStreamReader(java.io.InputStream, String)} |
| * method. This is referred to as "external encoding information" by the XML |
| * specifications.</li> |
| * <li>The encoding was specified by the XML encoding declaration.</li> |
| * <li>The encoding was detected using the first four bytes of the stream, as described |
| * in appendix of the XML specifications.</li> |
| * </ul> |
| * </li> |
| * <li>According to the table shown in the documentation of the |
| * {@link javax.xml.stream.XMLStreamReader} class, calls to |
| * {@link javax.xml.stream.XMLStreamReader#getEncoding()}, |
| * {@link javax.xml.stream.XMLStreamReader#getVersion()}, |
| * {@link javax.xml.stream.XMLStreamReader#isStandalone()}, |
| * {@link javax.xml.stream.XMLStreamReader#standaloneSet()} and |
| * {@link javax.xml.stream.XMLStreamReader#getCharacterEncodingScheme()} are only allowed |
| * in the {@link javax.xml.stream.XMLStreamConstants#START_DOCUMENT} state. On the other |
| * hand, this requirement is not mentioned in the documentation of the individual methods |
| * and the majority of StAX implementations support calls to these methods in any state. |
| * However, to improve portability, the dialect implementations normalize these methods to |
| * throw an {@link IllegalStateException} if they are called in a state other than |
| * {@link javax.xml.stream.XMLStreamConstants#START_DOCUMENT}.</li> |
| * <li>The documentation of {@link javax.xml.stream.XMLStreamReader#isCharacters()} specifies |
| * that this method "returns true if the cursor points to a character data event". |
| * On the other hand, the documentation of {@link javax.xml.stream.XMLStreamReader} |
| * states that "parsing events are defined as the XML Declaration, a DTD, start tag, |
| * character data, white space, end tag, comment, or processing instruction" and thus |
| * makes a clear distinction between character data events and white space events. |
| * This means that {@link javax.xml.stream.XMLStreamReader#isCharacters()} should return |
| * <code>true</code> if and only if the current event is |
| * {@link javax.xml.stream.XMLStreamConstants#CHARACTERS}. This is the case for most parsers, |
| * but some return <code>true</code> for {@link javax.xml.stream.XMLStreamConstants#SPACE} |
| * events as well. Where necessary, the dialect implementations correct this behavior. |
| * </li> |
| * <li>It is not clear which methods other than {@link XMLStreamWriter#setPrefix(String, String)} |
| * and {@link XMLStreamWriter#setDefaultNamespace(String)} should update the namespace |
| * context maintained by the {@link XMLStreamWriter} when namespace repairing is disabled. |
| * In Woodstox and IBM's XL XP-J, only {@link XMLStreamWriter#writeNamespace(String, String)} |
| * and {@link XMLStreamWriter#writeDefaultNamespace(String)} do this. On the other hand, in |
| * BEA's reference implementation and in SJSXP, |
| * {@link XMLStreamWriter#writeStartElement(String, String, String)} also updates |
| * the namespace context (unless the given prefix is already bound to the namespace URI). |
| * The dialect implementations normalize the behavior such that only |
| * {@link XMLStreamWriter#writeNamespace(String, String)} and |
| * {@link XMLStreamWriter#writeDefaultNamespace(String)} update the namespace context. |
| * <p> |
| * Note that the statement about Woodstox doesn't apply to very old versions. |
| * Originally, Woodstox' {@link XMLStreamWriter#writeNamespace(String, String)} |
| * and {@link XMLStreamWriter#writeDefaultNamespace(String)} implementations didn't update |
| * the namespace context (as mentioned in |
| * <a href="http://markmail.org/message/olsdl3p3gciqqeob">this post</a> from 2006). This |
| * behavior <a href="http://markmail.org/thread/eoxprrkr2d2qoeqs">was changed in 2007</a>. |
| * Woodstox versions older than that are not supported. |
| * <p> |
| * Also note that as a corollary, if namespace repairing is disabled, it is mandatory |
| * to make the necessary calls to {@link XMLStreamWriter#writeNamespace(String, String)} |
| * and {@link XMLStreamWriter#writeDefaultNamespace(String)} in order to produce XML that |
| * is well formed with respect to namespaces, and it should therefore not be necessary to |
| * call {@link XMLStreamWriter#setPrefix(String, String)} or |
| * {@link XMLStreamWriter#setDefaultNamespace(String)} explicitly. |
| * </ul> |
| * <p> |
| * Note that there are several ambiguities in the StAX specification which are not addressed by |
| * the different dialect implementations: |
| * <ul> |
| * <li>It is not clear whether {@link javax.xml.stream.XMLStreamReader#getAttributePrefix(int)} |
| * should return <code>null</code> or an empty string if the attribute doesn't have a |
| * prefix. Consistency with {@link javax.xml.stream.XMLStreamReader#getPrefix()} would |
| * imply that it should return <code>null</code>, but some implementations return an empty |
| * string.</li> |
| * <li>There is a contradicting in the documentation of the |
| * {@link javax.xml.stream.XMLStreamReader#next()} about the exception that is thrown when |
| * this method is called after {@link javax.xml.stream.XMLStreamReader#hasNext()} returns |
| * false. It can either be {@link IllegalStateException} or |
| * {@link java.util.NoSuchElementException}. |
| * <p> |
| * Note that some implementations (including the reference implementation) throw an |
| * {@link javax.xml.stream.XMLStreamException} in this case. This is considered as a |
| * violation of the specifications because this exception should only be used |
| * "if there is an error processing the underlying XML source", which is not the case.</li> |
| * <li>An XML document may contain a namespace declaration such as {@code xmlns=""}. In this |
| * case, it is not clear if {@link javax.xml.stream.XMLStreamReader#getNamespaceURI(int)} |
| * should return <code>null</code> or an empty string.</li> |
| * <li>The documentation of {@link javax.xml.stream.XMLStreamWriter#setPrefix(String, String)} |
| * and {@link javax.xml.stream.XMLStreamWriter#setDefaultNamespace(String)} requires that |
| * the namespace "is bound in the scope of the current START_ELEMENT / END_ELEMENT pair". |
| * The meaning of this requirement is clear in the context of an element written using |
| * the <code>writeStartElement</code> and <code>writeEndElement</code> methods. On the |
| * other hand, the requirement is ambiguous in the context of an element written using |
| * <code>writeEmptyElement</code> and there are two competing interpretations: |
| * <ol> |
| * <li>Since the element is empty, it doesn't define a nested scope and the namespace |
| * should be bound in the scope of the enclosing element.</li> |
| * <li>An invocation of one of the <code>writeEmptyElement</code> methods actually |
| * doesn't write a complete element because it can be followed by invocations |
| * of <code>writeAttribute</code>, <code>writeNamespace</code> or |
| * <code>writeDefaultNamespace</code>. The element is only completed by a |
| * call to a <code>write</code> method other than the aforementioned methods. |
| * An element written using <code>writeEmptyElement</code> therefore also |
| * defines a scope and the namespace should be bound in that scope.</li> |
| * </ol> |
| * While the second interpretation seems to be more consistent, it would introduce another |
| * ambiguity for the following sequence of calls: <code>writeEmptyElement</code>, |
| * <code>writeAttribute</code>, <code>setPrefix</code>, <code>writeCharacters</code>. |
| * In this case, it is not clear if the scope of the empty element should end at the call to |
| * <code>writeAttribute</code> or <code>writeCharacters</code>. |
| * <p> |
| * Because of these ambiguities, the dialect implementations don't attempt to normalize the |
| * behavior of {@link javax.xml.stream.XMLStreamWriter#setPrefix(String, String)} |
| * and {@link javax.xml.stream.XMLStreamWriter#setDefaultNamespace(String)} in this particular |
| * context, and their usage in conjunction with <code>writeEmptyElement</code> should be |
| * avoided. |
| * </li> |
| * </ul> |
| */ |
| public interface StAXDialect { |
| /** |
| * Get the name of this dialect. |
| * |
| * @return the name of the dialect |
| */ |
| String getName(); |
| |
| /** |
| * Configure the given factory to enable reporting of CDATA sections by stream readers created |
| * from it. The example in the documentation of the |
| * {@link javax.xml.stream.XMLStreamReader#next()} method suggests that even if the parser is non |
| * coalescing, CDATA sections should be reported as CHARACTERS events. Some implementations |
| * strictly follow the example, while for others it is sufficient to make the parser non |
| * coalescing. |
| * |
| * @param factory |
| * the factory to configure; this may be an already normalized factory or a "raw" |
| * factory object |
| * @return the factory with CDATA reporting enabled; this may be the original factory instance |
| * or a wrapper |
| * @throws UnsupportedOperationException |
| * if reporting of CDATA sections is not supported |
| */ |
| XMLInputFactory enableCDataReporting(XMLInputFactory factory); |
| |
| /** |
| * Configure the given factory to disallow DOCTYPE declarations. The effect of this is similar |
| * to the {@code http://apache.org/xml/features/disallow-doctype-decl} feature in Xerces. The |
| * factory instance returned by this method MUST satisfy the following requirements: |
| * <ul> |
| * <li>The factory or the reader implementation MUST throw an exception when requested to parse |
| * a document containing a DOCTYPE declaration. If the exception is not thrown by the factory, |
| * it MUST be thrown by the reader before the first {@link XMLStreamConstants#START_ELEMENT} |
| * event. |
| * <li>The parser MUST NOT attempt to load the external DTD subset or any other external |
| * entity. |
| * <li>The parser MUST protect itself against denial of service attacks based on deeply nested |
| * entity definitions present in the internal DTD subset. Ideally, the parser SHOULD NOT process |
| * the internal subset at all and throw an exception immediately when encountering the DOCTYPE |
| * declaration. |
| * </ul> |
| * This method is typically useful in the context of SOAP processing since a SOAP message must |
| * not contain a Document Type Declaration. |
| * |
| * @param factory |
| * the factory to configure; this may be an already normalized factory or a "raw" |
| * factory object |
| * @return the factory that disallows DOCTYPE declarations; this may be the original factory |
| * instance or a wrapper |
| */ |
| XMLInputFactory disallowDoctypeDecl(XMLInputFactory factory); |
| |
| /** |
| * Make an {@link XMLInputFactory} object thread safe. The implementation may do this either by |
| * configuring the factory or by creating a thread safe wrapper. The returned factory must be |
| * thread safe for all method calls that don't change the (visible) state of the factory. This |
| * means that thread safety is not required for |
| * {@link XMLInputFactory#setEventAllocator(javax.xml.stream.util.XMLEventAllocator)}, |
| * {@link XMLInputFactory#setProperty(String, Object)}, |
| * {@link XMLInputFactory#setXMLReporter(javax.xml.stream.XMLReporter)} and |
| * {@link XMLInputFactory#setXMLResolver(javax.xml.stream.XMLResolver)}. |
| * |
| * @param factory |
| * the factory to make thread safe |
| * @return the thread safe factory |
| */ |
| XMLInputFactory makeThreadSafe(XMLInputFactory factory); |
| |
| /** |
| * Make an {@link XMLOutputFactory} object thread safe. The implementation may do this either by |
| * configuring the factory or by creating a thread safe wrapper. The returned factory must be |
| * thread safe for all method calls that don't change the (visible) state, i.e. the properties, |
| * of the factory. |
| * |
| * @param factory |
| * the factory to make thread safe |
| * @return the thread safe factory |
| * @deprecated |
| */ |
| XMLOutputFactory makeThreadSafe(XMLOutputFactory factory); |
| |
| /** |
| * Normalize an {@link XMLInputFactory}. This will make sure that the readers created from the |
| * factory conform to the StAX specifications. |
| * |
| * @param factory |
| * the factory to normalize |
| * @return the normalized factory |
| */ |
| XMLInputFactory normalize(XMLInputFactory factory); |
| |
| /** |
| * Normalize an {@link XMLOutputFactory}. This will make sure that the writers created from the |
| * factory conform to the StAX specifications. |
| * |
| * @param factory |
| * the factory to normalize |
| * @return the normalized factory |
| * @deprecated |
| */ |
| XMLOutputFactory normalize(XMLOutputFactory factory); |
| } |