| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* $Id$ */ |
| |
| package org.apache.fop.pdf; |
| |
| import java.io.ByteArrayOutputStream; |
| |
| import java.util.Locale; |
| |
| import org.apache.fop.util.CharUtilities; |
| |
| /** |
| * This class represents a simple number object. It also contains contains some |
| * utility methods for outputting numbers to PDF. |
| */ |
| public class PDFText extends PDFObject { |
| |
| private static final char[] DIGITS |
| = {'0', '1', '2', '3', '4', '5', '6', '7', |
| '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; |
| |
| private String text; |
| |
| /** |
| * Returns the text. |
| * @return the text |
| */ |
| public String getText() { |
| return this.text; |
| } |
| |
| /** |
| * Sets the text. |
| * @param text the text |
| */ |
| public void setText(String text) { |
| this.text = text; |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| protected String toPDFString() { |
| if (getText() == null) { |
| throw new IllegalArgumentException( |
| "The text of this PDFText must not be empty"); |
| } |
| StringBuffer sb = new StringBuffer(64); |
| sb.append("("); |
| sb.append(escapeText(getText())); |
| sb.append(")"); |
| return sb.toString(); |
| } |
| |
| /** |
| * Escape text (see 4.4.1 in PDF 1.3 specs) |
| * @param text the text to encode |
| * @return encoded text |
| */ |
| public static final String escapeText(final String text) { |
| return escapeText(text, false); |
| } |
| /** |
| * Escape text (see 4.4.1 in PDF 1.3 specs) |
| * @param text the text to encode |
| * @param forceHexMode true if the output should follow the hex encoding rules |
| * @return encoded text |
| */ |
| public static final String escapeText(final String text, boolean forceHexMode) { |
| if (text != null && text.length() > 0) { |
| boolean unicode = false; |
| boolean hexMode = false; |
| if (forceHexMode) { |
| hexMode = true; |
| } else { |
| for (int i = 0, c = text.length(); i < c; i++) { |
| if (text.charAt(i) >= 128) { |
| unicode = true; |
| hexMode = true; |
| break; |
| } |
| } |
| } |
| |
| if (hexMode) { |
| final byte[] uniBytes; |
| try { |
| uniBytes = text.getBytes("UTF-16"); |
| } catch (java.io.UnsupportedEncodingException uee) { |
| throw new RuntimeException("Incompatible VM", uee); |
| } |
| return toHex(uniBytes); |
| } else { |
| final StringBuffer result = new StringBuffer(text.length() * 2); |
| result.append("("); |
| final int l = text.length(); |
| |
| if (unicode) { |
| // byte order marker (0xfeff) |
| result.append("\\376\\377"); |
| |
| for (int i = 0; i < l; i++) { |
| final char ch = text.charAt(i); |
| final int high = (ch & 0xff00) >>> 8; |
| final int low = ch & 0xff; |
| result.append("\\"); |
| result.append(Integer.toOctalString(high)); |
| result.append("\\"); |
| result.append(Integer.toOctalString(low)); |
| } |
| } else { |
| for (int i = 0; i < l; i++) { |
| final char ch = text.charAt(i); |
| if (ch < 256) { |
| escapeStringChar(ch, result); |
| } else { |
| throw new IllegalStateException( |
| "Can only treat text in 8-bit ASCII/PDFEncoding"); |
| } |
| } |
| } |
| result.append(")"); |
| return result.toString(); |
| } |
| } |
| return "()"; |
| } |
| |
| /** |
| * Converts a byte array to a Hexadecimal String (3.2.3 in PDF 1.4 specs) |
| * @param data the data to encode |
| * @param brackets true if enclosing brackets should be included |
| * @return String the resulting string |
| */ |
| public static final String toHex(byte[] data, boolean brackets) { |
| final StringBuffer sb = new StringBuffer(data.length * 2); |
| if (brackets) { |
| sb.append("<"); |
| } |
| for (byte aData : data) { |
| sb.append(DIGITS[(aData >>> 4) & 0x0F]); |
| sb.append(DIGITS[aData & 0x0F]); |
| } |
| if (brackets) { |
| sb.append(">"); |
| } |
| return sb.toString(); |
| } |
| |
| /** |
| * Converts a byte array to a Hexadecimal String (3.2.3 in PDF 1.4 specs) |
| * @param data the data to encode |
| * @return String the resulting string |
| */ |
| public static final String toHex(byte[] data) { |
| return toHex(data, true); |
| } |
| |
| /** |
| * Converts a String to UTF-16 (big endian). |
| * @param text text to convert |
| * @return byte[] UTF-16 stream |
| */ |
| public static final byte[] toUTF16(String text) { |
| try { |
| return text.getBytes("UnicodeBig"); |
| } catch (java.io.UnsupportedEncodingException uee) { |
| throw new RuntimeException("Incompatible VM", uee); |
| } |
| } |
| |
| /** |
| * Convert a char to a multibyte hex representation |
| * @param c character to encode |
| * @return the encoded character |
| */ |
| public static final String toUnicodeHex(char c) { |
| final StringBuffer buf = new StringBuffer(4); |
| final byte[] uniBytes; |
| try { |
| final char[] a = {c}; |
| uniBytes = new String(a).getBytes("UTF-16BE"); |
| } catch (java.io.UnsupportedEncodingException uee) { |
| throw new RuntimeException("Incompatible VM", uee); |
| } |
| |
| for (byte uniByte : uniBytes) { |
| buf.append(DIGITS[(uniByte >>> 4) & 0x0F]); |
| buf.append(DIGITS[uniByte & 0x0F]); |
| } |
| return buf.toString(); |
| } |
| |
| /** |
| * Convert a char to a multibyte hex representation appending to string buffer. |
| * The created string will be: |
| * <ul> |
| * <li>4-character string in case of non-BMP character</li> |
| * <li>6-character string in case of BMP character</li> |
| * </ul> |
| * @param c character to encode |
| * @param sb the string buffer to append output |
| */ |
| public static final void toUnicodeHex(int c, StringBuffer sb) { |
| if (CharUtilities.isBmpCodePoint(c)) { |
| sb.append(Integer.toHexString(c + 0x10000).substring(1).toUpperCase(Locale.US)); |
| } else { |
| sb.append(Integer.toHexString(c + 0x1000000).substring(1).toUpperCase(Locale.US)); |
| } |
| } |
| |
| /** |
| * Escaped a String as described in section 4.4 in the PDF 1.3 specs. |
| * @param s String to escape |
| * @return String the escaped String |
| */ |
| public static final String escapeString(final String s) { |
| if (s == null || s.length() == 0) { |
| return "()"; |
| } else { |
| final StringBuffer sb = new StringBuffer(64); |
| sb.append("("); |
| for (int i = 0; i < s.length(); i++) { |
| final char c = s.charAt(i); |
| escapeStringChar(c, sb); |
| } |
| sb.append(")"); |
| return sb.toString(); |
| } |
| } |
| |
| /** |
| * Escapes a character conforming to the rules established in the PostScript |
| * Language Reference (Search for "Literal Text Strings"). |
| * @param c character to escape |
| * @param target target StringBuffer to write the escaped character to |
| */ |
| public static final void escapeStringChar(final char c, final StringBuffer target) { |
| if (c > 127) { |
| target.append("\\"); |
| target.append(Integer.toOctalString(c)); |
| } else { |
| switch (c) { |
| case '\n': |
| target.append("\\n"); |
| break; |
| case '\r': |
| target.append("\\r"); |
| break; |
| case '\t': |
| target.append("\\t"); |
| break; |
| case '\b': |
| target.append("\\b"); |
| break; |
| case '\f': |
| target.append("\\f"); |
| break; |
| case '\\': |
| target.append("\\\\"); |
| break; |
| case '(': |
| target.append("\\("); |
| break; |
| case ')': |
| target.append("\\)"); |
| break; |
| default: |
| target.append(c); |
| } |
| } |
| } |
| |
| /** |
| * Escape a byte array for output to PDF (Used for encrypted strings) |
| * @param data data to encode |
| * @return byte[] encoded data |
| */ |
| public static final byte[] escapeByteArray(byte[] data) { |
| ByteArrayOutputStream bout = new ByteArrayOutputStream(data.length); |
| bout.write((int)'('); |
| for (final byte b : data) { |
| switch (b) { |
| case '\n': |
| bout.write('\\'); |
| bout.write('n'); |
| break; |
| case '\r': |
| bout.write('\\'); |
| bout.write('r'); |
| break; |
| case '\t': |
| bout.write('\\'); |
| bout.write('t'); |
| break; |
| case '\b': |
| bout.write('\\'); |
| bout.write('b'); |
| break; |
| case '\f': |
| bout.write('\\'); |
| bout.write('f'); |
| break; |
| case '\\': |
| bout.write('\\'); |
| bout.write('\\'); |
| break; |
| case '(': |
| bout.write('\\'); |
| bout.write('('); |
| break; |
| case ')': |
| bout.write('\\'); |
| bout.write(')'); |
| break; |
| default: |
| bout.write(b); |
| } |
| } |
| bout.write((int)')'); |
| return bout.toByteArray(); |
| } |
| |
| /** |
| * Converts a text to PDF's "string" data type. Unsupported characters get converted to '?' |
| * characters (similar to what the Java "US-ASCII" encoding does). |
| * @see #toPDFString(CharSequence, char) |
| * @param text the text to convert |
| * @return the converted string |
| */ |
| public static String toPDFString(CharSequence text) { |
| return toPDFString(text, '?'); |
| } |
| |
| /** |
| * Converts a text to PDF's "string" data type. Unsupported characters get converted to the |
| * given replacement character. |
| * <p> |
| * The PDF library currently doesn't properly distinguish between the PDF |
| * data types "string" and "text string", so we currently restrict "string" to US-ASCII, also |
| * because "string" seems somewhat under-specified concerning the upper 128 bytes. |
| * @param text the text to convert |
| * @param replacement the replacement character used when substituting a character |
| * @return the converted string |
| */ |
| public static String toPDFString(CharSequence text, char replacement) { |
| StringBuffer sb = new StringBuffer(); |
| for (int i = 0, c = text.length(); i < c; i++) { |
| char ch = text.charAt(i); |
| if (ch > 127) { |
| //TODO Revisit the restriction to US-ASCII once "string" and "text string" are |
| //"disentangled". |
| sb.append(replacement); |
| } else { |
| sb.append(ch); |
| } |
| } |
| return sb.toString(); |
| } |
| } |
| |