| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.cocoon.components.serializers.encoding; |
| |
| /** |
| * |
| * |
| * @author <a href="mailto:pier@apache.org">Pier Fumagalli</a>, February 2003 |
| * @version CVS $Id$ |
| */ |
| public abstract class CompiledCharset extends AbstractCharset { |
| |
| /** The encoding table of this <code>Charset</code>. */ |
| protected byte encoding[]; |
| |
| /** |
| * Create a new instance of this <code>CompiledCharset</code>. |
| * <p> |
| * After construction, the <code>compile()</code> method will have to |
| * be called for proper operation of this <code>Charset</code>. |
| * |
| * @param name This <code>Charset</code> name. |
| * @param aliases This <code>Charset</code> alias names. |
| * @throws NullPointerException If one of the arguments is <b>null</b>. |
| */ |
| protected CompiledCharset(String name, String aliases[]) { |
| super(name, aliases); |
| this.encoding = new byte[8192]; |
| for (int x = 0; x < this.encoding.length; x++) this.encoding[x] = 0; |
| } |
| |
| /** |
| * Create a new instance of this <code>CompiledCharset</code>. |
| * <p> |
| * The encodings table passed to this constructor <b>needs</b> to be 8192 |
| * bytes long, or (in other words), must contain exactly 65536 bits. |
| * </p> |
| * <p> |
| * As in the Java Programming Language a <code>char</code> can assume |
| * values between 0 (zero) and 65535 (inclusive), each bit in the specified |
| * array refers to a specific <code>char</code> value. |
| * </p> |
| * <p> |
| * When this specific bit is set to 1 (one or true) we assume that the |
| * charset <b>can</b> encode the given character, while when the bit is |
| * set to 0 (zero or false), the character cannot be represented using |
| * this <code>Charset</code>. |
| * </p> |
| * <p> |
| * For example, the <b>US-ASCII</b> <code>Charset</code> can represent |
| * only Java characters between 0 (zero) and 255 (inclusive), therefore |
| * the specified byte array will contain only 256 true bits. |
| * </p> |
| * <p> |
| * To check if a character can be encoded by this <code>Charset</code>, |
| * given "<code>c</code>" as the character to verify, one |
| * can write this simple formula: |
| * </p> |
| * <p> |
| * <nobr><code>((encoding[c >> 3] & (1 << (c & 0x07))) > 0) |
| * </p> |
| * <p> |
| * If the result of this operation is 0 (zero) the bit was set to zero, |
| * and therefore "<code>c</code>" cannot be represented in |
| * this <code>Charset</code>, while if the result is greater than 0 (zero) |
| * the character "<code>c</code>" can actually be represented |
| * by this <code>Charset</code> |
| * </p> |
| * |
| * @param name This <code>Charset</code> name. |
| * @param aliases This <code>Charset</code> alias names. |
| * @param encoding This <code>Charset</code> encoding table as specified |
| * above. |
| * @throws NullPointerException If one of the arguments is <b>null</b>. |
| * @throws IllegalArgumentException If the length of the encoding table |
| * is <b>not</b> 8192 precisely. |
| */ |
| protected CompiledCharset(String name, String aliases[], byte encoding[]) |
| throws NullPointerException, IllegalArgumentException { |
| super(name, aliases); |
| if (encoding == null) throw new NullPointerException("Invalid table"); |
| if (encoding.length != 8192) { |
| throw new IllegalArgumentException("Invalid encoding table size: " |
| + "current length is " + encoding.length + ", required 8192."); |
| } |
| this.encoding = encoding; |
| } |
| |
| /** |
| * Check if the specified character is representable by this specifiec |
| * <code>Charset</code> instance. |
| * </p> |
| */ |
| public boolean allows(char c) { |
| /* This is tied to haw the compiler does stuff. */ |
| return((this.encoding[c >> 3] & (1 << (c & 0x07))) > 0); |
| } |
| |
| /** |
| * Compile the encoding table of this <code>CompiledCharset</code>. |
| * <p> |
| * This method will invoke the <code>compile(...)</code> method for any |
| * possible value of a Java character (65536 times, from 0, zero, to |
| * 65535 inclusive), building the encoding table of the characters this |
| * <code>Charset</code> can successfully represent. |
| */ |
| protected final void compile() { |
| for (int x = 0; x <= Character.MAX_VALUE; x ++) { |
| if (this.compile((char)x)) { |
| int pos = x >> 3; |
| encoding[pos] = (byte) (encoding[pos] | (1 << (x & 0x07))); |
| } |
| } |
| } |
| |
| /** |
| * Return true or false wether this encoding can encode the specified |
| * character or not. |
| * <p> |
| * This method is equivalent to the <code>allows(...)</code> method, but |
| * it will be called upon construction of the encoding table. |
| * </p> |
| */ |
| protected abstract boolean compile(char c); |
| } |