| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* $Id$ */ |
| |
| package org.apache.fop.pdf; |
| |
| import java.io.IOException; |
| import java.io.Writer; |
| |
| /** |
| * Class representing ToUnicode CMaps. |
| * Here are some documentation resources: |
| * <ul> |
| * <li>PDF Reference, Second Edition, Section 5.6.4, for general information |
| * about CMaps in PDF Files.</li> |
| * <li>PDF Reference, Second Edition, Section 5.9, for specific information |
| * about ToUnicodeCMaps in PDF Files.</li> |
| * <li> |
| * <a href="http://partners.adobe.com/asn/developer/pdfs/tn/5411.ToUnicode.pdf"> |
| * Adobe Technical Note #5411, "ToUnicode Mapping File Tutorial"</a>. |
| * </ul> |
| */ |
| public class PDFToUnicodeCMap extends PDFCMap { |
| |
| /** |
| * The array of Unicode characters ordered by character code |
| * (maps from character code to Unicode code point). |
| */ |
| protected char[] unicodeCharMap; |
| |
| private boolean singleByte; |
| |
| /** |
| * Constructor. |
| * |
| * @param unicodeCharMap An array of Unicode characters ordered by character code |
| * (maps from character code to Unicode code point) |
| * @param name One of the registered names found in Table 5.14 in PDF |
| * Reference, Second Edition. |
| * @param sysInfo The attributes of the character collection of the CIDFont. |
| * @param singleByte true for single-byte, false for double-byte |
| */ |
| public PDFToUnicodeCMap(char[] unicodeCharMap, String name, PDFCIDSystemInfo sysInfo, |
| boolean singleByte) { |
| super(name, sysInfo); |
| if (singleByte && unicodeCharMap.length > 256) { |
| throw new IllegalArgumentException("unicodeCharMap may not contain more than" |
| + " 256 characters for single-byte encodings"); |
| } |
| this.unicodeCharMap = unicodeCharMap; |
| this.singleByte = singleByte; |
| } |
| |
| /** {@inheritDoc} */ |
| protected CMapBuilder createCMapBuilder(Writer writer) { |
| return new ToUnicodeCMapBuilder(writer); |
| } |
| |
| class ToUnicodeCMapBuilder extends CMapBuilder { |
| |
| public ToUnicodeCMapBuilder(Writer writer) { |
| super(writer, null); |
| } |
| |
| /** |
| * Writes the CMap to a Writer. |
| * @param writer the writer |
| * @throws IOException if an I/O error occurs |
| */ |
| public void writeCMap() throws IOException { |
| writeCIDInit(); |
| writeCIDSystemInfo("Adobe", "UCS", 0); |
| writeName("Adobe-Identity-UCS"); |
| writeType("2"); |
| writeCodeSpaceRange(singleByte); |
| writeBFEntries(); |
| writeWrapUp(); |
| } |
| |
| /** |
| * Writes the character mappings for this font. |
| * @param p StingBuffer to write to |
| */ |
| protected void writeBFEntries() throws IOException { |
| if (unicodeCharMap != null) { |
| writeBFCharEntries(unicodeCharMap); |
| writeBFRangeEntries(unicodeCharMap); |
| } |
| } |
| |
| /** |
| * Writes the entries for single characters of a base font (only characters which cannot be |
| * expressed as part of a character range). |
| * @param p StringBuffer to write to |
| * @param charArray all the characters to map |
| * @throws IOException |
| */ |
| protected void writeBFCharEntries(char[] charArray) throws IOException { |
| int totalEntries = 0; |
| for (int i = 0; i < charArray.length; i++) { |
| if (!partOfRange(charArray, i)) { |
| totalEntries++; |
| } |
| } |
| if (totalEntries < 1) { |
| return; |
| } |
| int remainingEntries = totalEntries; |
| int charIndex = 0; |
| do { |
| /* Limited to 100 entries in each section */ |
| int entriesThisSection = Math.min(remainingEntries, 100); |
| writer.write(entriesThisSection + " beginbfchar\n"); |
| for (int i = 0; i < entriesThisSection; i++) { |
| /* Go to the next char not in a range */ |
| while (partOfRange(charArray, charIndex)) { |
| charIndex++; |
| } |
| writer.write("<" + padCharIndex(charIndex) + "> "); |
| writer.write("<" + padHexString(Integer.toHexString(charArray[charIndex]), 4) |
| + ">\n"); |
| charIndex++; |
| } |
| remainingEntries -= entriesThisSection; |
| writer.write("endbfchar\n"); |
| } while (remainingEntries > 0); |
| } |
| |
| private String padCharIndex(int charIndex) { |
| return padHexString(Integer.toHexString(charIndex), (singleByte ? 2 : 4)); |
| } |
| |
| /** |
| * Writes the entries for character ranges for a base font. |
| * @param p StringBuffer to write to |
| * @param charArray all the characters to map |
| * @throws IOException |
| */ |
| protected void writeBFRangeEntries(char[] charArray) throws IOException { |
| int totalEntries = 0; |
| for (int i = 0; i < charArray.length; i++) { |
| if (startOfRange(charArray, i)) { |
| totalEntries++; |
| } |
| } |
| if (totalEntries < 1) { |
| return; |
| } |
| int remainingEntries = totalEntries; |
| int charIndex = 0; |
| do { |
| /* Limited to 100 entries in each section */ |
| int entriesThisSection = Math.min(remainingEntries, 100); |
| writer.write(entriesThisSection + " beginbfrange\n"); |
| for (int i = 0; i < entriesThisSection; i++) { |
| /* Go to the next start of a range */ |
| while (!startOfRange(charArray, charIndex)) { |
| charIndex++; |
| } |
| writer.write("<" + padCharIndex(charIndex) + "> "); |
| writer.write("<" |
| + padCharIndex(endOfRange(charArray, charIndex)) |
| + "> "); |
| writer.write("<" + padHexString(Integer.toHexString(charArray[charIndex]), 4) |
| + ">\n"); |
| charIndex++; |
| } |
| remainingEntries -= entriesThisSection; |
| writer.write("endbfrange\n"); |
| } while (remainingEntries > 0); |
| } |
| |
| /** |
| * Find the end of the current range. |
| * @param charArray The array which is being tested. |
| * @param startOfRange The index to the array element that is the start of |
| * the range. |
| * @return The index to the element that is the end of the range. |
| */ |
| private int endOfRange(char[] charArray, int startOfRange) { |
| int i = startOfRange; |
| while (i < charArray.length - 1 && sameRangeEntryAsNext(charArray, i)) { |
| i++; |
| } |
| return i; |
| } |
| |
| /** |
| * Determine whether this array element should be part of a bfchar entry or |
| * a bfrange entry. |
| * @param charArray The array to be tested. |
| * @param arrayIndex The index to the array element to be tested. |
| * @return True if this array element should be included in a range. |
| */ |
| private boolean partOfRange(char[] charArray, int arrayIndex) { |
| if (charArray.length < 2) { |
| return false; |
| } |
| if (arrayIndex == 0) { |
| return sameRangeEntryAsNext(charArray, 0); |
| } |
| if (arrayIndex == charArray.length - 1) { |
| return sameRangeEntryAsNext(charArray, arrayIndex - 1); |
| } |
| if (sameRangeEntryAsNext(charArray, arrayIndex - 1)) { |
| return true; |
| } |
| if (sameRangeEntryAsNext(charArray, arrayIndex)) { |
| return true; |
| } |
| return false; |
| } |
| |
| /** |
| * Determine whether two bytes can be written in the same bfrange entry. |
| * @param charArray The array to be tested. |
| * @param firstItem The first of the two items in the array to be tested. |
| * The second item is firstItem + 1. |
| * @return True if both 1) the next item in the array is sequential with |
| * this one, and 2) the first byte of the character in the first position |
| * is equal to the first byte of the character in the second position. |
| */ |
| private boolean sameRangeEntryAsNext(char[] charArray, int firstItem) { |
| if (charArray[firstItem] + 1 != charArray[firstItem + 1]) { |
| return false; |
| } |
| if (firstItem / 256 != (firstItem + 1) / 256) { |
| return false; |
| } |
| return true; |
| } |
| |
| /** |
| * Determine whether this array element should be the start of a bfrange |
| * entry. |
| * @param charArray The array to be tested. |
| * @param arrayIndex The index to the array element to be tested. |
| * @return True if this array element is the beginning of a range. |
| */ |
| private boolean startOfRange(char[] charArray, int arrayIndex) { |
| // Can't be the start of a range if not part of a range. |
| if (!partOfRange(charArray, arrayIndex)) { |
| return false; |
| } |
| // If first element in the array, must be start of a range |
| if (arrayIndex == 0) { |
| return true; |
| } |
| // If last element in the array, cannot be start of a range |
| if (arrayIndex == charArray.length - 1) { |
| return false; |
| } |
| /* |
| * If part of same range as the previous element is, cannot be start |
| * of range. |
| */ |
| if (sameRangeEntryAsNext(charArray, arrayIndex - 1)) { |
| return false; |
| } |
| // Otherwise, this is start of a range. |
| return true; |
| } |
| |
| /** |
| * Prepends the input string with a sufficient number of "0" characters to |
| * get the returned string to be numChars length. |
| * @param input The input string. |
| * @param numChars The minimum characters in the output string. |
| * @return The padded string. |
| */ |
| private String padHexString(String input, int numChars) { |
| int length = input.length(); |
| if (length >= numChars) { |
| return input; |
| } |
| StringBuffer returnString = new StringBuffer(); |
| for (int i = 1; i <= numChars - length; i++) { |
| returnString.append("0"); |
| } |
| returnString.append(input); |
| return returnString.toString(); |
| } |
| |
| } |
| |
| } |