| /* |
| * The Apache Software License, Version 1.1 |
| * |
| * |
| * Copyright (c) 1999-2003 The Apache Software Foundation. All rights |
| * reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * |
| * 3. The end-user documentation included with the redistribution, |
| * if any, must include the following acknowledgment: |
| * "This product includes software developed by the |
| * Apache Software Foundation (http://www.apache.org/)." |
| * Alternately, this acknowledgment may appear in the software itself, |
| * if and wherever such third-party acknowledgments normally appear. |
| * |
| * 4. The names "Xalan" and "Apache Software Foundation" must |
| * not be used to endorse or promote products derived from this |
| * software without prior written permission. For written |
| * permission, please contact apache@apache.org. |
| * |
| * 5. Products derived from this software may not be called "Apache", |
| * nor may "Apache" appear in their name, without prior written |
| * permission of the Apache Software Foundation. |
| * |
| * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
| * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
| * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
| * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| * ==================================================================== |
| * |
| * This software consists of voluntary contributions made by many |
| * individuals on behalf of the Apache Software Foundation and was |
| * originally based on software copyright (c) 1999, Lotus |
| * Development Corporation., http://www.lotus.com. For more |
| * information on the Apache Software Foundation, please see |
| * <http://www.apache.org/>. |
| */ |
| package org.apache.xalan.serialize; |
| |
| import java.util.BitSet; |
| |
| import java.io.InputStream; |
| import java.io.InputStreamReader; |
| import java.io.BufferedReader; |
| |
| import java.net.*; |
| |
| import java.util.Hashtable; |
| |
| import org.apache.xml.utils.CharKey; |
| |
| import org.apache.xalan.res.XSLMessages; |
| import org.apache.xalan.res.XSLTErrorResources; |
| |
| /** |
| * This class provides services that tell if a character should have |
| * special treatement, such as entity reference substitution or normalization |
| * of a newline character. It also provides character to entity reference |
| * lookup. |
| * |
| * DEVELOPERS: See Known Issue in the constructor. |
| */ |
| public class CharInfo |
| { |
| |
| /** Bit map that tells if a given character should have special treatment. */ |
| BitSet m_specialsMap = new BitSet(65535); |
| |
| /** Lookup table for characters to entity references. */ |
| private Hashtable m_charToEntityRef = new Hashtable(); |
| |
| /** |
| * The name of the HTML entities file. |
| * If specified, the file will be resource loaded with the default class loader. |
| */ |
| public static String HTML_ENTITIES_RESOURCE = "HTMLEntities.res"; |
| |
| /** |
| * The name of the XML entities file. |
| * If specified, the file will be resource loaded with the default class loader. |
| */ |
| public static String XML_ENTITIES_RESOURCE = "XMLEntities.res"; |
| |
| /** The linefeed character, which the parser should always normalize. */ |
| public static char S_LINEFEED = 0x0A; |
| |
| /** The carriage return character, which the parser should always normalize. */ |
| public static char S_CARRIAGERETURN = 0x0D; |
| |
| /** a zero length Class array used in the constructor */ |
| private static final Class[] NO_CLASSES = new Class[0]; |
| |
| /** a zero length Object array used in the constructor */ |
| private static final Object[] NO_OBJS = new Object[0]; |
| |
| |
| /** |
| * Constructor that reads in a resource file that describes the mapping of |
| * characters to entity references. |
| * |
| * Resource files must be encoded in UTF-8 and have a format like: |
| * <pre> |
| * # First char # is a comment |
| * Entity numericValue |
| * quot 34 |
| * amp 38 |
| * </pre> |
| * (Note: Why don't we just switch to .properties files? Oct-01 -sc) |
| * |
| * @param entitiesResource Name of entities resource file that should |
| * be loaded, which describes that mapping of characters to entity references. |
| */ |
| public CharInfo(String entitiesResource) |
| { |
| |
| InputStream is = null; |
| BufferedReader reader = null; |
| int index; |
| String name; |
| String value; |
| int code; |
| String line; |
| |
| try |
| { |
| try |
| { |
| // Maintenance note: we should evaluate replacing getting the |
| // ClassLoader with javax.xml.transform.FactoryFinder.findClassLoader() |
| // or similar code |
| ClassLoader cl = CharInfo.class.getClassLoader(); |
| |
| if (cl == null) { |
| is = ClassLoader.getSystemResourceAsStream(entitiesResource); |
| } else { |
| is = cl.getResourceAsStream(entitiesResource); |
| } |
| } |
| catch (Exception e) {} |
| |
| if (is == null) |
| is = CharInfo.class.getResourceAsStream(entitiesResource); |
| |
| if (is == null) |
| { |
| URL url = new URL(entitiesResource); |
| |
| is = url.openStream(); |
| } |
| |
| if (is == null) |
| throw new RuntimeException(XSLMessages.createMessage(XSLTErrorResources.ER_RESOURCE_COULD_NOT_FIND, new Object[]{entitiesResource, entitiesResource })); |
| |
| // Fix Bugzilla#4000: force reading in UTF-8 |
| // This creates the de facto standard that Xalan's resource |
| // files must be encoded in UTF-8. This should work in all JVMs. |
| // |
| // %REVIEW% KNOWN ISSUE: IT FAILS IN MICROSOFT VJ++, which |
| // didn't implement the UTF-8 encoding. Theoretically, we should |
| // simply let it fail in that case, since the JVM is obviously |
| // broken if it doesn't support such a basic standard. But |
| // since there are still some users attempting to use VJ++ for |
| // development, we have dropped in a fallback which makes a |
| // second attempt using the platform's default encoding. In VJ++ |
| // this is apparently ASCII, which is subset of UTF-8... and |
| // since the strings we'll be reading here are also primarily |
| // limited to the 7-bit ASCII range (at least, in English |
| // versions of Xalan), this should work well enough to keep us |
| // on the air until we're ready to officially decommit from |
| // VJ++. |
| try |
| { |
| reader = new BufferedReader(new InputStreamReader(is, "UTF-8")); |
| } |
| catch(java.io.UnsupportedEncodingException e) |
| { |
| reader = new BufferedReader(new InputStreamReader(is)); |
| } |
| |
| line = reader.readLine(); |
| |
| while (line != null) |
| { |
| if (line.length() == 0 || line.charAt(0) == '#') |
| { |
| line = reader.readLine(); |
| |
| continue; |
| } |
| |
| index = line.indexOf(' '); |
| |
| if (index > 1) |
| { |
| name = line.substring(0, index); |
| |
| ++index; |
| |
| if (index < line.length()) |
| { |
| value = line.substring(index); |
| index = value.indexOf(' '); |
| |
| if (index > 0) |
| value = value.substring(0, index); |
| |
| code = Integer.parseInt(value); |
| |
| defineEntity(name, (char) code); |
| } |
| } |
| |
| line = reader.readLine(); |
| } |
| |
| is.close(); |
| m_specialsMap.set(S_LINEFEED); |
| m_specialsMap.set(S_CARRIAGERETURN); |
| } |
| catch (Exception except) |
| { |
| throw new RuntimeException(XSLMessages.createMessage(XSLTErrorResources.ER_RESOURCE_COULD_NOT_LOAD, new Object[]{entitiesResource, except.toString(), entitiesResource, except.toString() })); |
| } |
| finally |
| { |
| if (is != null) |
| { |
| try |
| { |
| is.close(); |
| } |
| catch (Exception except){} |
| } |
| } |
| } |
| |
| /** |
| * Defines a new character reference. The reference's name and value are |
| * supplied. Nothing happens if the character reference is already defined. |
| * <p>Unlike internal entities, character references are a string to single |
| * character mapping. They are used to map non-ASCII characters both on |
| * parsing and printing, primarily for HTML documents. '<amp;' is an |
| * example of a character reference.</p> |
| * |
| * @param name The entity's name |
| * @param value The entity's value |
| */ |
| protected void defineEntity(String name, char value) |
| { |
| CharKey character = new CharKey(value); |
| |
| m_charToEntityRef.put(character, name); |
| m_specialsMap.set(value); |
| } |
| |
| private CharKey m_charKey = new CharKey(); |
| |
| /** |
| * Resolve a character to an entity reference name. |
| * |
| * This is reusing a stored key object, in an effort to avoid |
| * heap activity. Unfortunately, that introduces a threading risk. |
| * Simplest fix for now is to make it a synchronized method, or to give |
| * up the reuse; I see very little performance difference between them. |
| * Long-term solution would be to replace the hashtable with a sparse array |
| * keyed directly from the character's integer value; see DTM's |
| * string pool for a related solution. |
| * |
| * @param value character value that should be resolved to a name. |
| * |
| * @return name of character entity, or null if not found. |
| */ |
| synchronized |
| public String getEntityNameForChar(char value) |
| { |
| // CharKey m_charKey = new CharKey(); //Alternative to synchronized |
| m_charKey.setChar(value); |
| return (String) m_charToEntityRef.get(m_charKey); |
| } |
| |
| /** |
| * Tell if the character argument should have special treatment. |
| * |
| * @param value character value. |
| * |
| * @return true if the character should have any special treatment. |
| */ |
| public boolean isSpecial(char value) |
| { |
| return m_specialsMap.get(value); |
| } |
| } |