blob: 95999b73f3093f4701d6f0816cd50deb8c3dece9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* $Id$ */
package org.apache.fop.pdf;
import java.io.IOException;
import java.io.Writer;
/**
* Class representing ToUnicode CMaps.
* Here are some documentation resources:
* <ul>
* <li>PDF Reference, Second Edition, Section 5.6.4, for general information
* about CMaps in PDF Files.</li>
* <li>PDF Reference, Second Edition, Section 5.9, for specific information
* about ToUnicodeCMaps in PDF Files.</li>
* <li>
* <a href="http://partners.adobe.com/asn/developer/pdfs/tn/5411.ToUnicode.pdf">
* Adobe Technical Note #5411, "ToUnicode Mapping File Tutorial"</a>.
* </ul>
*/
public class PDFToUnicodeCMap extends PDFCMap {
/**
* The array of Unicode characters ordered by character code
* (maps from character code to Unicode code point).
*/
protected char[] unicodeCharMap;
private boolean singleByte;
/**
* Constructor.
*
* @param unicodeCharMap An array of Unicode characters ordered by character code
* (maps from character code to Unicode code point)
* @param name One of the registered names found in Table 5.14 in PDF
* Reference, Second Edition.
* @param sysInfo The attributes of the character collection of the CIDFont.
* @param singleByte true for single-byte, false for double-byte
*/
public PDFToUnicodeCMap(char[] unicodeCharMap, String name, PDFCIDSystemInfo sysInfo,
boolean singleByte) {
super(name, sysInfo);
if (singleByte && unicodeCharMap.length > 256) {
throw new IllegalArgumentException("unicodeCharMap may not contain more than"
+ " 256 characters for single-byte encodings");
}
this.unicodeCharMap = unicodeCharMap;
this.singleByte = singleByte;
}
/** {@inheritDoc} */
protected CMapBuilder createCMapBuilder(Writer writer) {
return new ToUnicodeCMapBuilder(writer);
}
class ToUnicodeCMapBuilder extends CMapBuilder {
public ToUnicodeCMapBuilder(Writer writer) {
super(writer, null);
}
/**
* Writes the CMap to a Writer.
* @param writer the writer
* @throws IOException if an I/O error occurs
*/
public void writeCMap() throws IOException {
writeCIDInit();
writeCIDSystemInfo("Adobe", "UCS", 0);
writeName("Adobe-Identity-UCS");
writeType("2");
writeCodeSpaceRange(singleByte);
writeBFEntries();
writeWrapUp();
}
/**
* Writes the character mappings for this font.
* @param p StingBuffer to write to
*/
protected void writeBFEntries() throws IOException {
if (unicodeCharMap != null) {
writeBFCharEntries(unicodeCharMap);
writeBFRangeEntries(unicodeCharMap);
}
}
/**
* Writes the entries for single characters of a base font (only characters which cannot be
* expressed as part of a character range).
* @param p StringBuffer to write to
* @param charArray all the characters to map
* @throws IOException
*/
protected void writeBFCharEntries(char[] charArray) throws IOException {
int totalEntries = 0;
for (int i = 0; i < charArray.length; i++) {
if (!partOfRange(charArray, i)) {
totalEntries++;
}
}
if (totalEntries < 1) {
return;
}
int remainingEntries = totalEntries;
int charIndex = 0;
do {
/* Limited to 100 entries in each section */
int entriesThisSection = Math.min(remainingEntries, 100);
writer.write(entriesThisSection + " beginbfchar\n");
for (int i = 0; i < entriesThisSection; i++) {
/* Go to the next char not in a range */
while (partOfRange(charArray, charIndex)) {
charIndex++;
}
writer.write("<" + padCharIndex(charIndex) + "> ");
writer.write("<" + padHexString(Integer.toHexString(charArray[charIndex]), 4)
+ ">\n");
charIndex++;
}
remainingEntries -= entriesThisSection;
writer.write("endbfchar\n");
} while (remainingEntries > 0);
}
private String padCharIndex(int charIndex) {
return padHexString(Integer.toHexString(charIndex), (singleByte ? 2 : 4));
}
/**
* Writes the entries for character ranges for a base font.
* @param p StringBuffer to write to
* @param charArray all the characters to map
* @throws IOException
*/
protected void writeBFRangeEntries(char[] charArray) throws IOException {
int totalEntries = 0;
for (int i = 0; i < charArray.length; i++) {
if (startOfRange(charArray, i)) {
totalEntries++;
}
}
if (totalEntries < 1) {
return;
}
int remainingEntries = totalEntries;
int charIndex = 0;
do {
/* Limited to 100 entries in each section */
int entriesThisSection = Math.min(remainingEntries, 100);
writer.write(entriesThisSection + " beginbfrange\n");
for (int i = 0; i < entriesThisSection; i++) {
/* Go to the next start of a range */
while (!startOfRange(charArray, charIndex)) {
charIndex++;
}
writer.write("<" + padCharIndex(charIndex) + "> ");
writer.write("<"
+ padCharIndex(endOfRange(charArray, charIndex))
+ "> ");
writer.write("<" + padHexString(Integer.toHexString(charArray[charIndex]), 4)
+ ">\n");
charIndex++;
}
remainingEntries -= entriesThisSection;
writer.write("endbfrange\n");
} while (remainingEntries > 0);
}
/**
* Find the end of the current range.
* @param charArray The array which is being tested.
* @param startOfRange The index to the array element that is the start of
* the range.
* @return The index to the element that is the end of the range.
*/
private int endOfRange(char[] charArray, int startOfRange) {
int i = startOfRange;
while (i < charArray.length - 1 && sameRangeEntryAsNext(charArray, i)) {
i++;
}
return i;
}
/**
* Determine whether this array element should be part of a bfchar entry or
* a bfrange entry.
* @param charArray The array to be tested.
* @param arrayIndex The index to the array element to be tested.
* @return True if this array element should be included in a range.
*/
private boolean partOfRange(char[] charArray, int arrayIndex) {
if (charArray.length < 2) {
return false;
}
if (arrayIndex == 0) {
return sameRangeEntryAsNext(charArray, 0);
}
if (arrayIndex == charArray.length - 1) {
return sameRangeEntryAsNext(charArray, arrayIndex - 1);
}
if (sameRangeEntryAsNext(charArray, arrayIndex - 1)) {
return true;
}
if (sameRangeEntryAsNext(charArray, arrayIndex)) {
return true;
}
return false;
}
/**
* Determine whether two bytes can be written in the same bfrange entry.
* @param charArray The array to be tested.
* @param firstItem The first of the two items in the array to be tested.
* The second item is firstItem + 1.
* @return True if both 1) the next item in the array is sequential with
* this one, and 2) the first byte of the character in the first position
* is equal to the first byte of the character in the second position.
*/
private boolean sameRangeEntryAsNext(char[] charArray, int firstItem) {
if (charArray[firstItem] + 1 != charArray[firstItem + 1]) {
return false;
}
if (firstItem / 256 != (firstItem + 1) / 256) {
return false;
}
return true;
}
/**
* Determine whether this array element should be the start of a bfrange
* entry.
* @param charArray The array to be tested.
* @param arrayIndex The index to the array element to be tested.
* @return True if this array element is the beginning of a range.
*/
private boolean startOfRange(char[] charArray, int arrayIndex) {
// Can't be the start of a range if not part of a range.
if (!partOfRange(charArray, arrayIndex)) {
return false;
}
// If first element in the array, must be start of a range
if (arrayIndex == 0) {
return true;
}
// If last element in the array, cannot be start of a range
if (arrayIndex == charArray.length - 1) {
return false;
}
/*
* If part of same range as the previous element is, cannot be start
* of range.
*/
if (sameRangeEntryAsNext(charArray, arrayIndex - 1)) {
return false;
}
// Otherwise, this is start of a range.
return true;
}
/**
* Prepends the input string with a sufficient number of "0" characters to
* get the returned string to be numChars length.
* @param input The input string.
* @param numChars The minimum characters in the output string.
* @return The padded string.
*/
private String padHexString(String input, int numChars) {
int length = input.length();
if (length >= numChars) {
return input;
}
StringBuffer returnString = new StringBuffer();
for (int i = 1; i <= numChars - length; i++) {
returnString.append("0");
}
returnString.append(input);
return returnString.toString();
}
}
}