blob: 8d9304d41fef4702415d3c243a0de31fa7ff17c9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.xerces.util;
/**
* <p>A utility for generating efficient code for initializing
* the character flags array of org.apache.xerces.util.XML11Char.
* This class should be updated if the array changes, followed
* by regeneration of the code for XML11Char.</p>
*
* @author Michael Glavassevich, IBM
* @author Glenn Marcy, IBM
* @author Andy Clark, IBM
* @author Arnaud Le Hors, IBM
* @author Neil Graham, IBM
*
* @version $Id$
*/
public class XML11CharGenerator {
//
// Constants
//
/** Character flags for XML 1.1. */
private static final byte XML11CHARS [] = new byte [1 << 16];
/** XML 1.1 Valid character mask. */
public static final int MASK_XML11_VALID = 0x01;
/** XML 1.1 Space character mask. */
public static final int MASK_XML11_SPACE = 0x02;
/** XML 1.1 Name start character mask. */
public static final int MASK_XML11_NAME_START = 0x04;
/** XML 1.1 Name character mask. */
public static final int MASK_XML11_NAME = 0x08;
/** XML 1.1 control character mask */
public static final int MASK_XML11_CONTROL = 0x10;
/** XML 1.1 content for external entities (valid - "special" chars - control chars) */
public static final int MASK_XML11_CONTENT = 0x20;
/** XML namespaces 1.1 NCNameStart */
public static final int MASK_XML11_NCNAME_START = 0x40;
/** XML namespaces 1.1 NCName */
public static final int MASK_XML11_NCNAME = 0x80;
/** XML 1.1 content for internal entities (valid - "special" chars) */
public static final int MASK_XML11_CONTENT_INTERNAL = MASK_XML11_CONTROL | MASK_XML11_CONTENT;
//
// Static initialization
//
static {
/****
* XML 1.1 initialization.
*/
// [2]: Char ::= [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
//
// NOTE: This range is Char - (RestrictedChar | S | #x85 | #x2028).
int xml11NonWhitespaceRange [] = {
0x21, 0x7E, 0xA0, 0x2027, 0x2029, 0xD7FF, 0xE000, 0xFFFD,
};
// NOTE: this does *NOT* correspond to the S production
// from XML 1.0. Rather, it corresponds to S+chars that are
// involved in whitespace normalization. It's handy
// in a few places in the entity scanner where we need to detect the
// presence of all characters to be considered whitespace.
int xml11WhitespaceChars [] = {
0x9, 0xA, 0xD, 0x20, 0x85, 0x2028,
};
// [2a]: RestrictedChar ::= [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] |
// [#x7F-#x84] | [#x86-#x9F]
int xml11ControlCharRange [] = {
0x1, 0x8, 0xB, 0xC, 0xE, 0x1F, 0x7f, 0x84, 0x86, 0x9f,
};
// [4]: NameStartChar ::= ":" | [A-Z] | "_" | [a-z] |
// [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] |
// [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] |
// [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
// [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
int xml11NameStartCharRange [] = {
':', ':', 'A', 'Z', '_', '_', 'a', 'z',
0xC0, 0xD6, 0xD8, 0xF6, 0xF8, 0x2FF,
0x370, 0x37D, 0x37F, 0x1FFF, 0x200C, 0x200D,
0x2070, 0x218F, 0x2C00, 0x2FEF, 0x3001, 0xD7FF,
0xF900, 0xFDCF, 0xFDF0, 0xFFFD,
};
// [4a]: NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 |
// [#x0300-#x036F] | [#x203F-#x2040]
int xml11NameCharRange [] = {
'-', '-', '.', '.', '0', '9', 0xB7, 0xB7,
0x0300, 0x036F, 0x203F, 0x2040,
};
//
// SpecialChar ::= '<', '&', '\n', '\r', ']'
//
int xml11SpecialChars[] = {
'<', '&', '\n', '\r', ']',
};
// initialize XML11CHARS
for(int i=0; i<xml11NonWhitespaceRange.length; i+=2) {
for(int j=xml11NonWhitespaceRange[i]; j<=xml11NonWhitespaceRange[i+1]; j++) {
XML11CHARS[j] |= MASK_XML11_VALID | MASK_XML11_CONTENT;
}
}
for(int i=0; i<xml11WhitespaceChars.length; i++) {
XML11CHARS[xml11WhitespaceChars[i]] |= MASK_XML11_VALID | MASK_XML11_SPACE | MASK_XML11_CONTENT;
}
for(int i=0; i<xml11ControlCharRange.length; i+=2) {
for(int j=xml11ControlCharRange[i]; j<=xml11ControlCharRange[i+1]; j++) {
XML11CHARS[j] |= MASK_XML11_VALID | MASK_XML11_CONTROL;
}
}
for (int i = 0; i < xml11NameStartCharRange.length; i+=2) {
for(int j=xml11NameStartCharRange[i]; j<=xml11NameStartCharRange[i+1]; j++) {
XML11CHARS[j] |= MASK_XML11_NAME_START | MASK_XML11_NAME |
MASK_XML11_NCNAME_START | MASK_XML11_NCNAME;
}
}
for (int i=0; i<xml11NameCharRange.length; i+=2) {
for(int j=xml11NameCharRange[i]; j<=xml11NameCharRange[i+1]; j++) {
XML11CHARS[j] |= MASK_XML11_NAME | MASK_XML11_NCNAME;
}
}
// remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars
XML11CHARS[':'] &= ~(MASK_XML11_NCNAME_START | MASK_XML11_NCNAME);
for(int i=0;i<xml11SpecialChars.length; i++) {
XML11CHARS[xml11SpecialChars[i]] &= (~MASK_XML11_CONTENT);
}
} // <clinit>()
/**
* Writes the code for initializing the character flags
* array for XML11Char to System.out.
*/
public static void main(String[] args) {
ArrayFillingCodeGenerator.generateByteArray("XML11CHARS", XML11CHARS, System.out);
}
}