blob: bac0709e73babce8c7131b1a7f52cba03d0cb153 [file] [log] [blame]
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999,2000,2001 The Apache Software Foundation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.xerces.impl;
import java.io.EOFException;
import java.io.IOException;
import java.util.Stack;
import org.apache.xerces.impl.XMLEntityManager;
import org.apache.xerces.impl.XMLEntityScanner;
import org.apache.xerces.impl.XMLErrorReporter;
import org.apache.xerces.impl.msg.XMLMessageFormatter;
import org.apache.xerces.util.XMLAttributesImpl;
import org.apache.xerces.util.XMLStringBuffer;
import org.apache.xerces.util.SymbolTable;
import org.apache.xerces.util.XMLChar;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XMLDocumentHandler;
import org.apache.xerces.xni.XMLString;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLComponent;
import org.apache.xerces.xni.parser.XMLComponentManager;
import org.apache.xerces.xni.parser.XMLConfigurationException;
import org.apache.xerces.xni.parser.XMLDocumentScanner;
import org.apache.xerces.xni.parser.XMLDTDScanner;
import org.apache.xerces.xni.parser.XMLInputSource;
/**
* This class is responsible for scanning XML document structure
* and content. The scanner acts as the source for the document
* information which is communicated to the document handler.
* <p>
* This component requires the following features and properties from the
* component manager that uses it:
* <ul>
* <li>http://xml.org/sax/features/namespaces</li>
* <li>http://xml.org/sax/features/validation</li>
* <li>http://apache.org/xml/features/nonvalidating/load-external-dtd</li>
* <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
* <li>http://apache.org/xml/features/scanner/notify-builtin-refs</li>
* <li>http://apache.org/xml/properties/internal/symbol-table</li>
* <li>http://apache.org/xml/properties/internal/error-reporter</li>
* <li>http://apache.org/xml/properties/internal/entity-manager</li>
* <li>http://apache.org/xml/properties/internal/dtd-scanner</li>
* </ul>
*
* @author Glenn Marcy, IBM
* @author Stubs generated by DesignDoc on Mon Sep 11 11:10:57 PDT 2000
* @author Andy Clark, IBM
* @author Arnaud Le Hors, IBM
* @author Eric Ye, IBM
*
* @version $Id$
*/
public class XMLDocumentScannerImpl
extends XMLDocumentFragmentScannerImpl {
//
// Constants
//
// scanner states
/** Scanner state: XML declaration. */
protected static final int SCANNER_STATE_XML_DECL = 0;
/** Scanner state: prolog. */
protected static final int SCANNER_STATE_PROLOG = 5;
/** Scanner state: trailing misc. */
protected static final int SCANNER_STATE_TRAILING_MISC = 12;
/** Scanner state: DTD internal declarations. */
protected static final int SCANNER_STATE_DTD_INTERNAL_DECLS = 17;
/** Scanner state: open DTD external subset. */
protected static final int SCANNER_STATE_DTD_EXTERNAL = 18;
/** Scanner state: DTD external declarations. */
protected static final int SCANNER_STATE_DTD_EXTERNAL_DECLS = 19;
// feature identifiers
/** Feature identifier: load external DTD. */
protected static final String LOAD_EXTERNAL_DTD =
Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE;
// property identifiers
/** Property identifier: DTD scanner. */
protected static final String DTD_SCANNER =
Constants.XERCES_PROPERTY_PREFIX + Constants.DTD_SCANNER_PROPERTY;
// recognized features and properties
/** Recognized features. */
private static final String[] RECOGNIZED_FEATURES = {
NAMESPACES,
VALIDATION,
LOAD_EXTERNAL_DTD,
NOTIFY_BUILTIN_REFS,
NOTIFY_CHAR_REFS,
};
/** Recognized properties. */
private static final String[] RECOGNIZED_PROPERTIES = {
SYMBOL_TABLE,
ERROR_REPORTER,
ENTITY_MANAGER,
DTD_SCANNER,
};
//
// Data
//
// properties
/** DTD scanner. */
protected XMLDTDScanner fDTDScanner;
// protected data
/** Scanning DTD. */
protected boolean fScanningDTD;
// other info
/** Doctype name. */
protected String fDoctypeName;
/** Doctype declaration public identifier. */
protected String fDoctypePublicId;
/** Doctype declaration system identifier. */
protected String fDoctypeSystemId;
// features
/** Load external DTD. */
protected boolean fLoadExternalDTD = true;
// state
/** Seen doctype declaration. */
protected boolean fSeenDoctypeDecl;
// dispatchers
/** XML declaration dispatcher. */
protected Dispatcher fXMLDeclDispatcher = new XMLDeclDispatcher();
/** Prolog dispatcher. */
protected Dispatcher fPrologDispatcher = new PrologDispatcher();
/** DTD dispatcher. */
protected Dispatcher fDTDDispatcher = new DTDDispatcher();
/** Trailing miscellaneous section dispatcher. */
protected Dispatcher fTrailingMiscDispatcher = new TrailingMiscDispatcher();
// temporary variables
/** Array of 3 strings. */
private String[] fStrings = new String[3];
/** String. */
private XMLString fString = new XMLString();
/** String buffer. */
private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
//
// Constructors
//
/** Default constructor. */
public XMLDocumentScannerImpl() {} // <init>()
//
// XMLDocumentScanner methods
//
/**
* Sets the input source.
*
* @param inputSource The input source.
*
* @throws IOException Thrown on i/o error.
*/
public void setInputSource(XMLInputSource inputSource) throws IOException {
fEntityManager.setEntityHandler(this);
fEntityManager.startDocumentEntity(inputSource);
fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId());
} // setInputSource(XMLInputSource)
//
// XMLComponent methods
//
/**
* Resets the component. The component can query the component manager
* about any features and properties that affect the operation of the
* component.
*
* @param componentManager The component manager.
*
* @throws SAXException Thrown by component on initialization error.
* For example, if a feature or property is
* required for the operation of the component, the
* component manager may throw a
* SAXNotRecognizedException or a
* SAXNotSupportedException.
*/
public void reset(XMLComponentManager componentManager)
throws XMLConfigurationException {
super.reset(componentManager);
// other settings
fDoctypeName = null;
fDoctypePublicId = null;
fDoctypeSystemId = null;
fSeenDoctypeDecl = false;
// xerces features
try {
fLoadExternalDTD = componentManager.getFeature(LOAD_EXTERNAL_DTD);
}
catch (XMLConfigurationException e) {
fLoadExternalDTD = true;
}
// xerces properties
fDTDScanner = (XMLDTDScanner)componentManager.getProperty(DTD_SCANNER);
// initialize vars
fScanningDTD = false;
// setup dispatcher
setScannerState(SCANNER_STATE_XML_DECL);
setDispatcher(fXMLDeclDispatcher);
} // reset(XMLComponentManager)
/**
* Returns a list of feature identifiers that are recognized by
* this component. This method may return null if no features
* are recognized by this component.
*/
public String[] getRecognizedFeatures() {
return RECOGNIZED_FEATURES;
} // getRecognizedFeatures():String[]
/**
* Sets the state of a feature. This method is called by the component
* manager any time after reset when a feature changes state.
* <p>
* <strong>Note:</strong> Components should silently ignore features
* that do not affect the operation of the component.
*
* @param featureId The feature identifier.
* @param state The state of the feature.
*
* @throws SAXNotRecognizedException The component should not throw
* this exception.
* @throws SAXNotSupportedException The component should not throw
* this exception.
*/
public void setFeature(String featureId, boolean state)
throws XMLConfigurationException {
super.setFeature(featureId, state);
// Xerces properties
if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) {
String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length());
if (feature.equals(Constants.LOAD_EXTERNAL_DTD_FEATURE)) {
fLoadExternalDTD = state;
return;
}
}
} // setFeature(String,boolean)
/**
* Returns a list of property identifiers that are recognized by
* this component. This method may return null if no properties
* are recognized by this component.
*/
public String[] getRecognizedProperties() {
return RECOGNIZED_PROPERTIES;
} // getRecognizedProperties():String[]
/**
* Sets the value of a property. This method is called by the component
* manager any time after reset when a property changes value.
* <p>
* <strong>Note:</strong> Components should silently ignore properties
* that do not affect the operation of the component.
*
* @param propertyId The property identifier.
* @param value The value of the property.
*
* @throws SAXNotRecognizedException The component should not throw
* this exception.
* @throws SAXNotSupportedException The component should not throw
* this exception.
*/
public void setProperty(String propertyId, Object value)
throws XMLConfigurationException {
super.setProperty(propertyId, value);
// Xerces properties
if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length());
if (property.equals(Constants.DTD_SCANNER_PROPERTY)) {
fDTDScanner = (XMLDTDScanner)value;
}
return;
}
} // setProperty(String,Object)
//
// XMLEntityHandler methods
//
/**
* This method notifies of the start of an entity. The DTD has the
* pseudo-name of "[dtd]" parameter entity names start with '%'; and
* general entities are just specified by their name.
*
* @param name The name of the entity.
* @param publicId The public identifier of the entity if the entity
* is external, null otherwise.
* @param systemId The system identifier of the entity if the entity
* is external, null otherwise.
* @param baseSystemId The base system identifier of the entity if
* the entity is external, null otherwise.
* @param encoding The auto-detected IANA encoding name of the entity
* stream. This value will be null in those situations
* where the entity encoding is not auto-detected (e.g.
* internal entities or a document entity that is
* parsed from a java.io.Reader).
*
* @throws XNIException Thrown by handler to signal an error.
*/
public void startEntity(String name,
String publicId, String systemId,
String baseSystemId,
String encoding) throws XNIException {
super.startEntity(name, publicId, systemId, baseSystemId, encoding);
// prepare to look for a TextDecl if external general entity
if (!name.equals("[xml]") && fEntityScanner.isExternal()) {
setScannerState(SCANNER_STATE_TEXT_DECL);
}
// call handler
if (fDocumentHandler != null) {
if (name.equals("[xml]")) {
fDocumentHandler.startDocument(fEntityScanner, encoding, fAugmentations);
}
}
} // startEntity(String,String,String,String,String)
/**
* This method notifies the end of an entity. The DTD has the pseudo-name
* of "[dtd]" parameter entity names start with '%'; and general entities
* are just specified by their name.
*
* @param name The name of the entity.
*
* @throws XNIException Thrown by handler to signal an error.
*/
public void endEntity(String name) throws XNIException {
super.endEntity(name);
// call handler
if (fDocumentHandler != null) {
if (name.equals("[xml]")) {
fDocumentHandler.endDocument(fAugmentations);
}
}
} // endEntity(String)
//
// Protected methods
//
// dispatcher factory methods
/** Creates a content dispatcher. */
protected Dispatcher createContentDispatcher() {
return new ContentDispatcher();
} // createContentDispatcher():Dispatcher
// scanning methods
/** Scans a doctype declaration. */
protected boolean scanDoctypeDecl() throws IOException, XNIException {
// spaces
if (!fEntityScanner.skipSpaces()) {
reportFatalError("MSG_SPACE_REQUIRED_BEFORE_ROOT_ELEMENT_TYPE_IN_DOCTYPEDECL",
null);
}
// root element name
fDoctypeName = fEntityScanner.scanName();
if (fDoctypeName == null) {
reportFatalError("MSG_ROOT_ELEMENT_TYPE_REQUIRED", null);
}
// external id
if (fEntityScanner.skipSpaces()) {
scanExternalID(fStrings, false);
fDoctypeSystemId = fStrings[0];
fDoctypePublicId = fStrings[1];
fEntityScanner.skipSpaces();
}
fHasExternalDTD = fDoctypeSystemId != null;
// call handler
if (fDocumentHandler != null) {
// NOTE: I don't like calling the doctypeDecl callback until
// end of the *full* doctype line (including internal
// subset) is parsed correctly but SAX2 requires that
// it knows the root element name and public and system
// identifier for the startDTD call. -Ac
fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId,
fAugmentations);
}
// is there an internal subset?
boolean internalSubset = true;
if (!fEntityScanner.skipChar('[')) {
internalSubset = false;
fEntityScanner.skipSpaces();
if (!fEntityScanner.skipChar('>')) {
reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName});
}
fMarkupDepth--;
}
return internalSubset;
} // scanDoctypeDecl():boolean
//
// Private methods
//
/** Returns the scanner state name. */
protected String getScannerStateName(int state) {
switch (state) {
case SCANNER_STATE_XML_DECL: return "SCANNER_STATE_XML_DECL";
case SCANNER_STATE_PROLOG: return "SCANNER_STATE_PROLOG";
case SCANNER_STATE_TRAILING_MISC: return "SCANNER_STATE_TRAILING_MISC";
case SCANNER_STATE_DTD_INTERNAL_DECLS: return "SCANNER_STATE_DTD_INTERNAL_DECLS";
case SCANNER_STATE_DTD_EXTERNAL: return "SCANNER_STATE_DTD_EXTERNAL";
case SCANNER_STATE_DTD_EXTERNAL_DECLS: return "SCANNER_STATE_DTD_EXTERNAL_DECLS";
}
return super.getScannerStateName(state);
} // getScannerStateName(int):String
//
// Classes
//
/**
* Dispatcher to handle XMLDecl scanning.
*
* @author Andy Clark, IBM
*/
protected final class XMLDeclDispatcher
implements Dispatcher {
//
// Dispatcher methods
//
/**
* Dispatch an XML "event".
*
* @param complete True if this dispatcher is intended to scan
* and dispatch as much as possible.
*
* @returns True if there is more to dispatch either from this
* or a another dispatcher.
*
* @throws IOException Thrown on i/o error.
* @throws XNIException Thrown on parse error.
*/
public boolean dispatch(boolean complete)
throws IOException, XNIException {
// next dispatcher is prolog regardless of whether there
// is an XMLDecl in this document
setScannerState(SCANNER_STATE_PROLOG);
setDispatcher(fPrologDispatcher);
// scan XMLDecl
try {
if (fEntityScanner.skipString("<?xml")) {
fMarkupDepth++;
// NOTE: special case where document starts with a PI
// whose name starts with "xml" (e.g. "xmlfoo")
if (XMLChar.isName(fEntityScanner.peekChar())) {
fStringBuffer.clear();
fStringBuffer.append("xml");
while (XMLChar.isName(fEntityScanner.peekChar())) {
fStringBuffer.append((char)fEntityScanner.scanChar());
}
String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length);
scanPIData(target, fString);
}
// standard XML declaration
else {
scanXMLDeclOrTextDecl(false);
}
return true;
}
}
// premature end of file
catch (EOFException e) {
reportFatalError("PrematureEOF", null);
throw e;
}
// if no XMLDecl, then scan piece of prolog
return true;
} // dispatch(boolean):boolean
} // class XMLDeclDispatcher
/**
* Dispatcher to handle prolog scanning.
*
* @author Andy Clark, IBM
*/
protected final class PrologDispatcher
implements Dispatcher {
//
// Dispatcher methods
//
/**
* Dispatch an XML "event".
*
* @param complete True if this dispatcher is intended to scan
* and dispatch as much as possible.
*
* @returns True if there is more to dispatch either from this
* or a another dispatcher.
*
* @throws IOException Thrown on i/o error.
* @throws XNIException Thrown on parse error.
*/
public boolean dispatch(boolean complete)
throws IOException, XNIException {
try {
boolean again;
do {
again = false;
switch (fScannerState) {
case SCANNER_STATE_PROLOG: {
// if we're here then we're past the prolog decl!
fEntityManager.fCurrentEntity.mayReadChunks = true;
fEntityScanner.skipSpaces();
if (fEntityScanner.skipChar('<')) {
setScannerState(SCANNER_STATE_START_OF_MARKUP);
again = true;
}
else if (fEntityScanner.skipChar('&')) {
setScannerState(SCANNER_STATE_REFERENCE);
again = true;
}
else {
setScannerState(SCANNER_STATE_CONTENT);
again = true;
}
break;
}
case SCANNER_STATE_START_OF_MARKUP: {
fMarkupDepth++;
if (fEntityScanner.skipChar('?')) {
setScannerState(SCANNER_STATE_PI);
again = true;
}
else if (fEntityScanner.skipChar('!')) {
if (fEntityScanner.skipChar('-')) {
if (!fEntityScanner.skipChar('-')) {
reportFatalError("InvalidCommentStart",
null);
}
setScannerState(SCANNER_STATE_COMMENT);
again = true;
}
else if (fEntityScanner.skipString("DOCTYPE")) {
setScannerState(SCANNER_STATE_DOCTYPE);
again = true;
}
else {
reportFatalError("MarkupNotRecognizedInProlog",
null);
}
}
else if (XMLChar.isNameStart(fEntityScanner.peekChar())) {
setScannerState(SCANNER_STATE_ROOT_ELEMENT);
setDispatcher(fContentDispatcher);
return true;
}
else {
reportFatalError("MarkupNotRecognizedInProlog",
null);
}
break;
}
case SCANNER_STATE_COMMENT: {
scanComment();
setScannerState(SCANNER_STATE_PROLOG);
break;
}
case SCANNER_STATE_PI: {
scanPI();
setScannerState(SCANNER_STATE_PROLOG);
break;
}
case SCANNER_STATE_DOCTYPE: {
if (fSeenDoctypeDecl) {
reportFatalError("AlreadySeenDoctype", null);
}
fSeenDoctypeDecl = true;
if (scanDoctypeDecl()) {
setScannerState(SCANNER_STATE_DTD_INTERNAL_DECLS);
setDispatcher(fDTDDispatcher);
return true;
}
if (fDoctypeSystemId != null) {
setScannerState(SCANNER_STATE_DTD_EXTERNAL);
setDispatcher(fDTDDispatcher);
return true;
}
setScannerState(SCANNER_STATE_PROLOG);
break;
}
case SCANNER_STATE_CONTENT: {
reportFatalError("ContentIllegalInProlog", null);
fEntityScanner.scanChar();
}
case SCANNER_STATE_REFERENCE: {
reportFatalError("ReferenceIllegalInProlog", null);
}
}
} while (complete || again);
if (complete) {
if (fEntityScanner.scanChar() != '<') {
reportFatalError("RootElementRequired", null);
}
setScannerState(SCANNER_STATE_ROOT_ELEMENT);
setDispatcher(fContentDispatcher);
}
}
// premature end of file
catch (EOFException e) {
reportFatalError("PrematureEOF", null);
throw e;
}
return true;
} // dispatch(boolean):boolean
} // class PrologDispatcher
/**
* Dispatcher to handle the internal and external DTD subsets.
*
* @author Andy Clark, IBM
*/
protected final class DTDDispatcher
implements Dispatcher {
//
// Dispatcher methods
//
/**
* Dispatch an XML "event".
*
* @param complete True if this dispatcher is intended to scan
* and dispatch as much as possible.
*
* @returns True if there is more to dispatch either from this
* or a another dispatcher.
*
* @throws IOException Thrown on i/o error.
* @throws XNIException Thrown on parse error.
*/
public boolean dispatch(boolean complete)
throws IOException, XNIException {
fEntityManager.setEntityHandler(null);
try {
boolean again;
do {
again = false;
switch (fScannerState) {
case SCANNER_STATE_DTD_INTERNAL_DECLS: {
// REVISIT: Should there be a feature for
// the "complete" parameter?
boolean completeDTD = true;
boolean moreToScan = fDTDScanner.scanDTDInternalSubset(completeDTD, fStandalone, fHasExternalDTD);
if (!moreToScan) {
// end doctype declaration
if (!fEntityScanner.skipChar(']')) {
reportFatalError("EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET",
null);
}
fEntityScanner.skipSpaces();
if (!fEntityScanner.skipChar('>')) {
reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName});
}
fMarkupDepth--;
// scan external subset next
if (fDoctypeSystemId != null && (fValidation || fLoadExternalDTD)) {
setScannerState(SCANNER_STATE_DTD_EXTERNAL);
}
// break out of here
else {
setScannerState(SCANNER_STATE_PROLOG);
setDispatcher(fPrologDispatcher);
fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
return true;
}
}
break;
}
case SCANNER_STATE_DTD_EXTERNAL: {
XMLInputSource xmlInputSource =
fEntityManager.resolveEntity(fDoctypePublicId, fDoctypeSystemId, fDocumentSystemId);
fDTDScanner.setInputSource(xmlInputSource);
setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS);
again = true;
break;
}
case SCANNER_STATE_DTD_EXTERNAL_DECLS: {
// REVISIT: Should there be a feature for
// the "complete" parameter?
boolean completeDTD = true;
boolean moreToScan = fDTDScanner.scanDTDExternalSubset(completeDTD);
if (!moreToScan) {
setScannerState(SCANNER_STATE_PROLOG);
setDispatcher(fPrologDispatcher);
fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
return true;
}
break;
}
default: {
throw new XNIException("DTDDispatcher#dispatch: scanner state="+fScannerState+" ("+getScannerStateName(fScannerState)+')');
}
}
} while (complete || again);
}
// premature end of file
catch (EOFException e) {
reportFatalError("PrematureEOF", null);
throw e;
}
// cleanup
finally {
fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
}
return true;
} // dispatch(boolean):boolean
} // class DTDDispatcher
/**
* Dispatcher to handle content scanning.
*
* @author Andy Clark, IBM
* @author Eric Ye, IBM
*/
protected final class ContentDispatcher
extends FragmentContentDispatcher {
//
// Protected methods
//
// hooks
// NOTE: These hook methods are added so that the full document
// scanner can share the majority of code with this class.
/**
* Scan for DOCTYPE hook. This method is a hook for subclasses
* to add code to handle scanning for a the "DOCTYPE" string
* after the string "<!" has been scanned.
*
* @returns True if the "DOCTYPE" was scanned; false if "DOCTYPE"
* was not scanned.
*/
protected boolean scanForDoctypeHook()
throws IOException, XNIException {
if (fEntityScanner.skipString("DOCTYPE")) {
setScannerState(SCANNER_STATE_DOCTYPE);
return true;
}
return false;
} // scanForDoctypeHook():boolean
/**
* Element depth iz zero. This methos is a hook for subclasses
* to add code to handle when the element depth hits zero. When
* scanning a document fragment, an element depth of zero is
* normal. However, when scanning a full XML document, the
* scanner must handle the trailing miscellanous section of
* the document after the end of the document's root element.
*
* @returns True if the caller should stop and return true which
* allows the scanner to switch to a new scanning
* dispatcher. A return value of false indicates that
* the content dispatcher should continue as normal.
*/
protected boolean elementDepthIsZeroHook()
throws IOException, XNIException {
setScannerState(SCANNER_STATE_TRAILING_MISC);
setDispatcher(fTrailingMiscDispatcher);
return true;
} // elementDepthIsZeroHook():boolean
/**
* Scan for root element hook. This method is a hook for
* subclasses to add code that handles scanning for the root
* element. When scanning a document fragment, there is no
* "root" element. However, when scanning a full XML document,
* the scanner must handle the root element specially.
*
* @returns True if the caller should stop and return true which
* allows the scanner to switch to a new scanning
* dispatcher. A return value of false indicates that
* the content dispatcher should continue as normal.
*/
protected boolean scanRootElementHook()
throws IOException, XNIException {
if (scanStartElement()) {
setScannerState(SCANNER_STATE_TRAILING_MISC);
setDispatcher(fTrailingMiscDispatcher);
return true;
}
return false;
} // scanRootElementHook():boolean
/**
* End of file hook. This method is a hook for subclasses to
* add code that handles the end of file. The end of file in
* a document fragment is OK if the markup depth is zero.
* However, when scanning a full XML document, an end of file
* is always premature.
*/
protected void endOfFileHook(EOFException e)
throws IOException, XNIException {
reportFatalError("PrematureEOF", null);
throw e;
} // endOfFileHook()
} // class ContentDispatcher
/**
* Dispatcher to handle trailing miscellaneous section scanning.
*
* @author Andy Clark, IBM
* @author Eric Ye, IBM
*/
protected final class TrailingMiscDispatcher
implements Dispatcher {
//
// Dispatcher methods
//
/**
* Dispatch an XML "event".
*
* @param complete True if this dispatcher is intended to scan
* and dispatch as much as possible.
*
* @returns True if there is more to dispatch either from this
* or a another dispatcher.
*
* @throws IOException Thrown on i/o error.
* @throws XNIException Thrown on parse error.
*/
public boolean dispatch(boolean complete)
throws IOException, XNIException {
try {
boolean again;
do {
again = false;
switch (fScannerState) {
case SCANNER_STATE_TRAILING_MISC: {
fEntityScanner.skipSpaces();
if (fEntityScanner.skipChar('<')) {
setScannerState(SCANNER_STATE_START_OF_MARKUP);
again = true;
}
else {
setScannerState(SCANNER_STATE_CONTENT);
again = true;
}
break;
}
case SCANNER_STATE_START_OF_MARKUP: {
fMarkupDepth++;
if (fEntityScanner.skipChar('?')) {
setScannerState(SCANNER_STATE_PI);
again = true;
}
else if (fEntityScanner.skipChar('!')) {
setScannerState(SCANNER_STATE_COMMENT);
again = true;
}
/***
// REVISIT: Should we detect this?
else if (XMLChar.isNameStart(fEntityScanner.peekChar())) {
reportFatalError("MarkupNotRecognizedInMisc",
null);
// REVISIT: continue after fatal error
}
/***/
else {
reportFatalError("MarkupNotRecognizedInMisc",
null);
}
break;
}
case SCANNER_STATE_PI: {
scanPI();
setScannerState(SCANNER_STATE_TRAILING_MISC);
break;
}
case SCANNER_STATE_COMMENT: {
if (!fEntityScanner.skipString("--")) {
reportFatalError("InvalidCommentStart", null);
}
scanComment();
setScannerState(SCANNER_STATE_TRAILING_MISC);
break;
}
case SCANNER_STATE_CONTENT: {
int ch = fEntityScanner.peekChar();
if (ch == -1) {
setScannerState(SCANNER_STATE_TERMINATED);
return false;
}
reportFatalError("ContentIllegalInTrailingMisc",
null);
fEntityScanner.scanChar();
setScannerState(SCANNER_STATE_TRAILING_MISC);
break;
}
case SCANNER_STATE_REFERENCE: {
reportFatalError("ReferenceIllegalInTrailingMisc",
null);
setScannerState(SCANNER_STATE_TRAILING_MISC);
break;
}
case SCANNER_STATE_TERMINATED: {
return false;
}
}
} while (complete || again);
}
catch (EOFException e) {
// NOTE: This is the only place we're allowed to reach
// the real end of the document stream. Unless the
// end of file was reached prematurely.
if (fMarkupDepth != 0) {
reportFatalError("PrematureEOF", null);
throw e;
}
setScannerState(SCANNER_STATE_TERMINATED);
return false;
}
return true;
} // dispatch(boolean):boolean
} // class TrailingMiscDispatcher
} // class XMLDocumentScannerImpl