blob: 87ff6d60981ef2eda63267521e4a176cdc7771f9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* $Id$ */
package org.apache.fop.fonts.type1;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.Scanner;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class PostscriptParser {
protected static final Log LOG = LogFactory.getLog(PostscriptParser.class);
/* Patterns used to identify Postscript elements */
private static final String DICTIONARY = "dict";
private static final String FIXED_ARRAY = "array";
private static final String VARIABLE_ARRAY = "[";
private static final String SUBROUTINE = "{";
/* A list of parsed subroutines so if they are encountered during the parsing
* phase of another element, they can be read and pattern matched. */
private HashMap<String, PSSubroutine> subroutines = new HashMap<String, PSSubroutine>();
/**
* Parses the postscript document and returns a list of elements
* @param segment The byte array containing the postscript data
* @return A list of found Postscript elements
* @throws IOException
*/
public List<PSElement> parse(byte[] segment) throws IOException {
List<PSElement> parsedElements = new ArrayList<PSElement>();
/* Currently only scan and store the top level element. For deeper
* Postscript parsing you can push and pop elements from a stack */
PSElement foundElement = null;
String operator = null;
StringBuilder token = new StringBuilder();
List<String> tokens = new ArrayList<String>();
int startPoint = -1;
boolean specialDelimiter = false;
boolean lastWasSpecial = false;
for (int i = 0; i < segment.length; i++) {
byte cur = segment[i];
if (foundElement != null && foundElement.hasMore()) {
foundElement.parse(cur, i);
continue;
} else {
char c = (char)cur;
if (!lastWasSpecial) {
specialDelimiter = (c == '{' || c == '}' || c == '[' || c == ']'
|| (!token.toString().equals("") && c == '/'));
boolean isNotBreak = !(c == ' ' || c == '\r' || cur == 15 || cur == 12
|| cur == 10);
if (isNotBreak && !specialDelimiter) {
token.append(c);
continue;
}
} else {
lastWasSpecial = false;
token.append(c);
if (token.toString().equals("/")) {
continue;
}
}
}
try {
boolean setOp = false;
if ((foundElement == null || !foundElement.hasMore()) && token.length() > 1
&& token.charAt(0) == '/' && tokens.size() != 1 || hasEndToken(token.toString())) {
operator = token.toString();
setOp = true;
if (tokens.size() > 2 && tokens.get(tokens.size() - 1).equals("def")) {
PSVariable newVar = new PSVariable(tokens.get(0), startPoint);
newVar.setValue(tokens.get(1));
newVar.setEndPoint(i - operator.length());
parsedElements.add(newVar);
}
tokens.clear();
startPoint = i - token.length();
}
if (operator != null) {
if (foundElement instanceof PSSubroutine) {
PSSubroutine sub = (PSSubroutine)foundElement;
subroutines.put(sub.getOperator(), sub);
parsedElements.add(sub);
if (!setOp) {
operator = "";
}
} else {
if (foundElement != null) {
if (!hasMatch(foundElement.getOperator(), parsedElements)) {
parsedElements.add(foundElement);
} else {
LOG.warn("Duplicate " + foundElement.getOperator()
+ " in font file, Ignoring.");
}
}
}
//Compare token against patterns and create an element if matched
foundElement = createElement(operator, token.toString(), startPoint);
}
} finally {
tokens.add(token.toString());
token = new StringBuilder();
if (specialDelimiter) {
specialDelimiter = false;
lastWasSpecial = true;
//Retrace special postscript character so it can be processed separately
i--;
}
}
}
return parsedElements;
}
private boolean hasEndToken(String token) {
return token.equals("currentdict");
}
private boolean hasMatch(String operator, List<PSElement> elements) {
for (PSElement element : elements) {
if (element.getOperator().equals(operator)) {
return true;
}
}
return false;
}
public PSElement createElement(String operator, String elementID, int startPoint) {
if (operator.equals("")) {
return null;
}
if (elementID.equals(FIXED_ARRAY)) {
return new PSFixedArray(operator, startPoint);
} else if (elementID.equals(VARIABLE_ARRAY)) {
return new PSVariableArray(operator, startPoint);
} else if (elementID.equals(SUBROUTINE)) {
return new PSSubroutine(operator, startPoint);
} else if (!operator.equals("/Private") && elementID.equals(DICTIONARY)) {
return new PSDictionary(operator, startPoint);
}
return null;
}
/**
* A base Postscript element class
*/
public abstract class PSElement {
/* The identifying operator for this element */
protected String operator;
private List<Byte> token;
/* Determines whether there is any more data to be read whilst parsing */
protected boolean hasMore = true;
/* The locations of any entries containing binary data (e.g. arrays) */
protected LinkedHashMap<String, int[]> binaryEntries;
/* The tokens parsed from the current element */
protected List<String> tokens;
/* Determines whether binary data is currently being read / parsed */
protected boolean readBinary = false;
/* The location of the element within the binary data */
private int startPoint = -1;
protected int endPoint = -1;
/* A flag to determine if unexpected postscript has been found in the element */
private boolean foundUnexpected = false;
public PSElement(String operator, int startPoint) {
this.operator = operator;
this.startPoint = startPoint;
token = new ArrayList<Byte>();
binaryEntries = new LinkedHashMap<String, int[]>();
tokens = new ArrayList<String>();
}
/**
* Gets the Postscript element operator
* @return The operator returned as a string
*/
public String getOperator() {
return operator;
}
/**
* The start location of the element within the source binary data
* @return The start location returned as an integer
*/
public int getStartPoint() {
return startPoint;
}
/**
* The end location of the element within the source binary data
* @return The end location returned as an integer
*/
public int getEndPoint() {
return endPoint;
}
/**
* Takes over the task of tokenizing the byte data
* @param cur The current byte being read
*/
public void parse(byte cur, int pos) throws UnsupportedEncodingException {
if (!readBinary) {
char c = (char)cur;
boolean specialDelimiter = (c == '{' || c == '}' || c == '[' || c == ']'
|| c == '(' || c == ')');
boolean isNotValidBreak = !(c == ' ' || cur == 15 || cur == 12 || c == '\r'
|| c == 10);
if (isNotValidBreak && !specialDelimiter) {
token.add(cur);
} else {
parseToken(pos);
}
if (specialDelimiter) {
token.add(cur);
parseToken(pos);
}
} else {
parseByte(cur, pos);
}
}
private void parseToken(int pos) throws UnsupportedEncodingException {
byte[] bytesToken = new byte[token.size()];
for (int i = 0; i < token.size(); i++) {
bytesToken[i] = token.get(i).byteValue();
}
parseToken(new String(bytesToken, "ASCII"), pos);
token.clear();
}
/**
* Passes responsibility for processing the byte stream to the PostScript object
* @param cur The byte currently being read
* @param pos The position of the given byte
*/
public abstract void parseByte(byte cur, int pos);
/**
* Delegates the parse routine to a sub class
* @param token The token which to parse
*/
public abstract void parseToken(String token, int curPos);
protected boolean isInteger(String intValue) {
try {
Integer.parseInt(intValue);
return true;
} catch (NumberFormatException ex) {
return false;
}
}
public LinkedHashMap<String, int[]> getBinaryEntries() {
return binaryEntries;
}
/**
* Gets the binary entry location of a given index from the array
* @param index The index for which to retrieve the binary data location
* @return
*/
public int[] getBinaryEntryByIndex(int index) {
int count = 0;
for (Entry<String, int[]> entry : binaryEntries.entrySet()) {
if (count == index) {
return entry.getValue();
}
count++;
}
return new int[0];
}
/**
* Determines if more data is still to be parsed for the Postscript element.
* @return Returns true if more data exists
*/
public boolean hasMore() {
return hasMore;
}
/**
* Sets a value to be true if an expected entry postscript is found in the element.
* An example is where the encoding table may have a series of postscript operators
* altering the state of the array. In this case the only option will be to
* fully embed the font to avoid incorrect encoding in the resulting subset.
* @param foundUnexpected true if unexpected postscript is found.
*/
protected void setFoundUnexpected(boolean foundUnexpected) {
this.foundUnexpected = foundUnexpected;
}
/**
* Returns whether unexpected postscript has been found in the element
* @return true if unexpected postscript is found
*/
public boolean getFoundUnexpected() {
return this.foundUnexpected;
}
}
/**
* An object representing a Postscript array with a fixed number of entries
*/
public class PSFixedArray extends PSElement {
private String entry = "";
private String token = "";
private boolean finished = false;
protected int binaryLength = 0;
/* A list containing each entry and it's contents in the array */
private HashMap<Integer, String> entries;
private static final String READ_ONLY = "readonly";
public PSFixedArray(String operator, int startPoint) {
super(operator, startPoint);
entries = new HashMap<Integer, String>();
}
@Override
public void parseToken(String token, int curPos) {
if (!checkForEnd(token) || token.equals("def")) {
hasMore = false;
endPoint = curPos;
return;
}
if (token.equals("dup")) {
if (entry.startsWith("dup")) {
addEntry(entry);
}
entry = "";
tokens.clear();
}
if (!token.equals(READ_ONLY)) {
entry += token + " ";
}
if (!token.trim().equals("")) {
tokens.add(token);
}
if (tokens.size() == 4 && tokens.get(0).equals("dup") && isInteger(tokens.get(2))) {
binaryLength = Integer.parseInt(tokens.get(2));
readBinary = true;
}
}
private boolean checkForEnd(String checkToken) {
boolean subFound = false;
//Check for a subroutine matching that of an array end definition
PSSubroutine sub = subroutines.get("/" + checkToken);
if (sub != null && sub.getSubroutine().contains("def")) {
subFound = true;
}
if (!finished && (subFound || checkToken.equals("def"))) {
finished = true;
addEntry(entry);
return false;
} else {
return !finished;
}
}
/**
* Gets a map of array entries identified by index
* @return Returns the map of array entries
*/
public HashMap<Integer, String> getEntries() {
return entries;
}
private void addEntry(String entry) {
if (!entry.equals("")) {
if (entry.indexOf('/') != -1 && entry.charAt(entry.indexOf('/') - 1) != ' ') {
entry = entry.replace("/", " /");
}
int entryLen;
do {
entryLen = entry.length();
entry = entry.replace(" ", " ");
} while (entry.length() != entryLen);
Scanner s = new Scanner(entry).useDelimiter(" ");
boolean valid = false;
do {
s.next();
if (!s.hasNext()) {
break;
}
int id = s.nextInt();
entries.put(id, entry);
valid = true;
} while (false);
if (!valid) {
setFoundUnexpected(true);
}
}
}
@Override
public void parseByte(byte cur, int pos) {
if (binaryLength > 0) {
token += (char)cur;
binaryLength--;
} else {
if (readBinary) {
int bLength = Integer.parseInt(tokens.get(2));
int start = pos - bLength;
int end = start + bLength;
binaryEntries.put(tokens.get(1), new int[] {start, end});
token = "";
readBinary = false;
} else {
tokens.add(token);
parseToken(token, pos);
token = "";
}
}
}
}
/**
* An object representing a Postscript array with a variable number of entries
*/
public class PSVariableArray extends PSElement {
private int level = 0;
private List<String> arrayItems;
private String entry = "";
public PSVariableArray(String operator, int startPoint) {
super(operator, startPoint);
arrayItems = new ArrayList<String>();
}
@Override
public void parseToken(String token, int curPos) {
entry += token + " ";
if (level <= 0 && token.length() > 0 && token.charAt(0) == ']') {
hasMore = false;
endPoint = curPos;
return;
}
/* If the array item is a subroutine, the following keeps track of the current level
* of the tokens being parsed so that it can identify the finish */
if (token.equals("{")) {
level++;
} else if (token.equals("}")) {
level--;
if (!entry.equals("") && level == 0) {
arrayItems.add(entry);
entry = "";
}
}
}
/**
* Gets a list of found array entries within the variable array
* @return Returns the found array elements as a list
*/
public List<String> getEntries() {
return arrayItems;
}
@Override
public void parseByte(byte cur, int pos) {
//Not currently used
}
}
/**
* An object representing a Postscript subroutine element
*/
public class PSSubroutine extends PSElement {
private int level = 1;
private String entry = "";
public PSSubroutine(String operator, int startPoint) {
super(operator, startPoint);
}
@Override
public void parseToken(String token, int curPos) {
if (level == 0 && token.length() > 0 && (token.equals("def") || token.equals("ifelse")
|| token.charAt(0) == '}')) {
hasMore = false;
endPoint = curPos;
return;
}
if (token.equals("{")) {
level++;
} else if (token.equals("}")) {
level--;
}
entry += token + " ";
}
/**
* Gets the parsed subroutine element as unmodified string
* @return The subroutine as a string
*/
public String getSubroutine() {
return entry.trim();
}
@Override
public void parseByte(byte cur, int pos) {
//Not currently used
}
}
/**
* An object representing a Postscript dictionary
*/
public class PSDictionary extends PSElement {
/* A list of dictionary entries which they themselves could be variables,
* subroutines and arrays, This is currently left as parsed Strings as there is
* no need to delve deeper for our current purposes. */
private HashMap<String, String> entries;
private String entry = "";
private String token = "";
protected int binaryLength = 0;
public PSDictionary(String operator, int startPoint) {
super(operator, startPoint);
entries = new HashMap<String, String>();
}
@Override
public void parseToken(String token, int curPos) {
if (token.equals("end")) {
addEntry(entry);
hasMore = false;
endPoint = curPos;
return;
}
if (token.startsWith("/")) {
if (entry.trim().startsWith("/")) {
tokens.clear();
addEntry(entry);
}
entry = "";
}
if (tokens.size() >= 1 || token.startsWith("/")) {
tokens.add(token);
}
entry += token + " ";
if (tokens.size() == 3 && tokens.get(0).startsWith("/") && !tokens.get(2).equals("def")
&& isInteger(tokens.get(1))) {
binaryLength = Integer.parseInt(tokens.get(1));
readBinary = true;
}
}
/**
* Gets a map of dictionary entries identified by their name
* @return Returns the dictionary entries as a map
*/
public HashMap<String, String> getEntries() {
return entries;
}
private void addEntry(String entry) {
Scanner s = new Scanner(entry).useDelimiter(" ");
String id = s.next();
entries.put(id, entry);
}
@Override
public void parseByte(byte cur, int pos) {
if (binaryLength > 0) {
binaryLength--;
} else {
if (readBinary) {
int start = pos - Integer.parseInt(tokens.get(1));
int end = pos;
binaryEntries.put(tokens.get(0), new int[] {start, end});
readBinary = false;
} else {
tokens.add(token);
parseToken(token, pos);
}
}
}
}
/**
* An object representing a Postscript variable
*/
public class PSVariable extends PSElement {
/* The value of the parsed Postscript variable. */
private String value = "";
public PSVariable(String operator, int startPoint) {
super(operator, startPoint);
}
@Override
public void parseToken(String token, int curPos) {
if (token.equals("def")) {
hasMore = false;
endPoint = curPos;
return;
}
}
@Override
public void parseByte(byte cur, int pos) {
//Not currently used
}
/**
* Sets the value of the Postscript variable value
* @param value The value to set
*/
public void setValue(String value) {
this.value = value;
}
/**
* Gets the value of the Postscript variable
* @return Returns the value as a String
*/
public String getValue() {
return value;
}
/**
* Sets the end point location of the current Postscript variable.
* @param endPoint The end point location as an integer
*/
public void setEndPoint(int endPoint) {
this.endPoint = endPoint;
}
}
}