| /* |
| * The Apache Software License, Version 1.1 |
| * |
| * Copyright (c) 1999 The Apache Software Foundation. All rights |
| * reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * |
| * 3. The end-user documentation included with the redistribution, |
| * if any, must include the following acknowledgment: |
| * "This product includes software developed by the |
| * Apache Software Foundation (http://www.apache.org/)." |
| * Alternately, this acknowledgment may appear in the software itself, |
| * if and wherever such third-party acknowledgments normally appear. |
| * |
| * 4. The names "Xerces" and "Apache Software Foundation" must |
| * not be used to endorse or promote products derived from this |
| * software without prior written permission. For written |
| * permission, please contact apache\@apache.org. |
| * |
| * 5. Products derived from this software may not be called "Apache", |
| * nor may "Apache" appear in their name, without prior written |
| * permission of the Apache Software Foundation. |
| * |
| * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
| * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
| * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
| * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| * ==================================================================== |
| * |
| * This software consists of voluntary contributions made by many |
| * individuals on behalf of the Apache Software Foundation, and was |
| * originally based on software copyright (c) 1999, International |
| * Business Machines, Inc., http://www.ibm.com . For more information |
| * on the Apache Software Foundation, please see |
| * <http://www.apache.org/>. |
| */ |
| |
| /** |
| * $Log$ |
| * Revision 1.2 1999/11/23 01:49:27 rahulj |
| * Cannot use class qualifier in class defn. CC under HPUX is happy. |
| * |
| * Revision 1.1.1.1 1999/11/09 01:08:37 twl |
| * Initial checkin |
| * |
| * Revision 1.2 1999/11/08 20:44:40 rahul |
| * Swat for adding in Product name and CVS comment log variable. |
| * |
| */ |
| |
| #if !defined(XMLRECOGNIZER_HPP) |
| #define XMLRECOGNIZER_HPP |
| |
| // |
| // This class provides some simple code to recognize the encodings of |
| // XML files. This recognition only does very basic sensing of the encoding |
| // in a broad sense. Basically its just enough to let us get started and |
| // read the XMLDecl line. The scanner, once it reads the XMLDecl, will |
| // tell the reader any actual encoding string it found and the reader can |
| // update itself to be more specific at that point. |
| // |
| class XMLPARSER_EXPORT XMLRecognizer |
| { |
| public : |
| // ----------------------------------------------------------------------- |
| // Class types |
| // |
| // This enum represents the various encoding families that we have to |
| // deal with individually at the scanner level. This does not indicate |
| // the exact encoding, just the rough family that would let us scan |
| // the XML/TextDecl to find the encoding string. |
| // |
| // The 'L's and 'B's stand for little or big endian. We conditionally |
| // create versions that will automatically map to the local UTF-16 and |
| // UCS-4 endian modes. |
| // |
| // OtherEncoding means that its some transcoder based encoding, i.e. not |
| // one of the ones that we do internally. Its a special case and should |
| // never be used directly outside of the reader. |
| // |
| // NOTE: Keep this in sync with the name map array in the Cpp file!! |
| // ----------------------------------------------------------------------- |
| enum Encodings |
| { |
| EBCDIC = 0 |
| , UCS_4B = 1 |
| , UCS_4L = 2 |
| , US_ASCII = 3 |
| , UTF_8 = 4 |
| , UTF_16B = 5 |
| , UTF_16L = 6 |
| |
| , Encodings_Count |
| , Encodings_Min = EBCDIC |
| , Encodings_Max = UTF_16L |
| |
| , OtherEncoding = 999 |
| |
| #if defined(ENDIANMODE_BIG) |
| , Def_UTF16 = UTF_16B |
| , Def_UCS4 = UCS_4B |
| #else |
| , Def_UTF16 = UTF_16L |
| , Def_UCS4 = UCS_4L |
| #endif |
| }; |
| |
| |
| // ----------------------------------------------------------------------- |
| // Encoding recognition methods |
| // ----------------------------------------------------------------------- |
| static Encodings basicEncodingProbe |
| ( |
| const XMLByte* const rawBuffer |
| , const unsigned int rawByteCount |
| ); |
| |
| static Encodings encodingForName |
| ( |
| const XMLCh* const theEncName |
| ); |
| |
| static const XMLCh* nameForEncoding(const Encodings theEncoding); |
| |
| |
| private : |
| // ----------------------------------------------------------------------- |
| // Unimplemented constructors, operators, and destructor |
| // |
| // This class is effectively being used as a namespace for some static |
| // methods. |
| // ----------------------------------------------------------------------- |
| XMLRecognizer(); |
| ~XMLRecognizer(); |
| void operator=(const XMLRecognizer&); |
| }; |
| |
| #endif |