| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* $Id$ */ |
| |
| package org.apache.fop.afp.fonts; |
| |
| import java.io.IOException; |
| import java.io.OutputStream; |
| import java.nio.ByteBuffer; |
| import java.nio.CharBuffer; |
| import java.nio.charset.CharacterCodingException; |
| import java.nio.charset.Charset; |
| import java.nio.charset.CharsetEncoder; |
| import java.nio.charset.CodingErrorAction; |
| |
| /** |
| * An abstraction that wraps the encoding mechanism for encoding a Unicode character sequence into a |
| * specified format. |
| */ |
| public abstract class CharactersetEncoder { |
| |
| private final CharsetEncoder encoder; |
| |
| private CharactersetEncoder(String encoding) { |
| this.encoder = Charset.forName(encoding).newEncoder(); |
| this.encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); |
| } |
| |
| /** |
| * Tells whether or not this encoder can encode the given character. |
| * |
| * @param c the character |
| * @return true if, and only if, this encoder can encode the given character |
| * @throws IllegalStateException - If an encoding operation is already in progress |
| */ |
| final boolean canEncode(char c) { |
| return encoder.canEncode(c); |
| } |
| |
| /** |
| * Encodes a character sequence to a byte array. |
| * |
| * @param chars the character sequence |
| * @return the encoded character sequence |
| * @throws CharacterCodingException if the encoding operation fails |
| */ |
| final EncodedChars encode(CharSequence chars) throws CharacterCodingException { |
| ByteBuffer bb; |
| // encode method is not thread safe |
| synchronized (encoder) { |
| bb = encoder.encode(CharBuffer.wrap(chars)); |
| } |
| if (bb.hasArray()) { |
| return getEncodedChars(bb.array(), bb.limit()); |
| } else { |
| bb.rewind(); |
| byte[] bytes = new byte[bb.remaining()]; |
| bb.get(bytes); |
| return getEncodedChars(bytes, bytes.length); |
| } |
| } |
| |
| abstract EncodedChars getEncodedChars(byte[] byteArray, int length); |
| |
| /** |
| * Encodes <code>chars</code> into a format specified by <code>encoding</code>. |
| * |
| * @param chars the character sequence |
| * @param encoding the encoding type |
| * @return encoded data |
| * @throws CharacterCodingException if encoding fails |
| */ |
| public static EncodedChars encodeSBCS(CharSequence chars, String encoding) |
| throws CharacterCodingException { |
| CharactersetEncoder encoder = CharacterSetType.SINGLE_BYTE.getEncoder(encoding); |
| return encoder.encode(chars); |
| } |
| |
| /** |
| * The EBCDIC double byte encoder is used for encoding IBM format DBCS (double byte character |
| * sets) with an EBCDIC code-page. Given a double byte EBCDIC code page and a Unicode character |
| * sequence it will return its EBCDIC code-point, however, the "Shift In - Shift Out" operators |
| * are removed from the sequence of bytes. These are only used in Line Data. |
| */ |
| static final class EbcdicDoubleByteLineDataEncoder extends CharactersetEncoder { |
| EbcdicDoubleByteLineDataEncoder(String encoding) { |
| super(encoding); |
| } |
| @Override |
| EncodedChars getEncodedChars(byte[] byteArray, int length) { |
| if (byteArray[0] == 0x0E && byteArray[length - 1] == 0x0F) { |
| return new EncodedChars(byteArray, 1, length - 2, true); |
| } |
| return new EncodedChars(byteArray, true); |
| } |
| } |
| |
| /** |
| * The default encoder is used for encoding IBM format SBCS (single byte character sets), this |
| * the primary format for most Latin character sets. This can also be used for Unicode double- |
| * byte character sets (DBCS). |
| */ |
| static final class DefaultEncoder extends CharactersetEncoder { |
| private final boolean isDBCS; |
| |
| DefaultEncoder(String encoding, boolean isDBCS) { |
| super(encoding); |
| this.isDBCS = isDBCS; |
| } |
| |
| @Override |
| EncodedChars getEncodedChars(byte[] byteArray, int length) { |
| return new EncodedChars(byteArray, isDBCS); |
| } |
| } |
| |
| /** |
| * A container for encoded character bytes |
| */ |
| // CSOFF: FinalClass - disabling "final" modifier so that this class can be mocked |
| public static class EncodedChars { |
| |
| private final byte[] bytes; |
| private final int offset; |
| private final int length; |
| private final boolean isDBCS; |
| |
| private EncodedChars(byte[] bytes, int offset, int length, boolean isDBCS) { |
| if (offset < 0 || length < 0 || offset + length > bytes.length) { |
| throw new IllegalArgumentException(); |
| } |
| this.bytes = bytes; |
| this.offset = offset; |
| this.length = length; |
| this.isDBCS = isDBCS; |
| } |
| |
| private EncodedChars(byte[] bytes, boolean isDBCS) { |
| this(bytes, 0, bytes.length, isDBCS); |
| } |
| |
| /** |
| * write <code>length</code> bytes from <code>offset</code> to the output stream |
| * |
| * @param out output to write the bytes to |
| * @param offset the offset where to write |
| * @param length the length to write |
| * @throws IOException if an I/O error occurs |
| */ |
| public void writeTo(OutputStream out, int offset, int length) throws IOException { |
| if (offset < 0 || length < 0 || offset + length > bytes.length) { |
| throw new IllegalArgumentException(); |
| } |
| out.write(bytes, this.offset + offset, length); |
| } |
| |
| /** |
| * The number of containing bytes. |
| * |
| * @return the length |
| */ |
| public int getLength() { |
| return length; |
| } |
| |
| /** |
| * Indicates whether or not the EncodedChars object wraps double byte characters. |
| * |
| * @return true if the wrapped characters are double byte (DBCSs) |
| */ |
| public boolean isDBCS() { |
| return isDBCS; |
| } |
| |
| /** |
| * The bytes |
| * |
| * @return the bytes |
| */ |
| public byte[] getBytes() { |
| // return copy just in case |
| byte[] copy = new byte[bytes.length]; |
| System.arraycopy(bytes, 0, copy, 0, bytes.length); |
| return copy; |
| } |
| } |
| } |