| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* $Id$ */ |
| |
| package org.apache.fop.fonts.type1; |
| |
| import java.io.IOException; |
| import java.io.UnsupportedEncodingException; |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.LinkedHashMap; |
| import java.util.List; |
| import java.util.Map.Entry; |
| import java.util.Scanner; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| |
| public class PostscriptParser { |
| |
| protected static final Log LOG = LogFactory.getLog(PostscriptParser.class); |
| /* Patterns used to identify Postscript elements */ |
| private static final String DICTIONARY = "dict"; |
| private static final String FIXED_ARRAY = "array"; |
| private static final String VARIABLE_ARRAY = "["; |
| private static final String SUBROUTINE = "{"; |
| /* A list of parsed subroutines so if they are encountered during the parsing |
| * phase of another element, they can be read and pattern matched. */ |
| private HashMap<String, PSSubroutine> subroutines = new HashMap<String, PSSubroutine>(); |
| |
| /** |
| * Parses the postscript document and returns a list of elements |
| * @param segment The byte array containing the postscript data |
| * @return A list of found Postscript elements |
| * @throws IOException |
| */ |
| public List<PSElement> parse(byte[] segment) throws IOException { |
| List<PSElement> parsedElements = new ArrayList<PSElement>(); |
| /* Currently only scan and store the top level element. For deeper |
| * Postscript parsing you can push and pop elements from a stack */ |
| PSElement foundElement = null; |
| String operator = null; |
| StringBuilder token = new StringBuilder(); |
| List<String> tokens = new ArrayList<String>(); |
| int startPoint = -1; |
| boolean specialDelimiter = false; |
| boolean lastWasSpecial = false; |
| for (int i = 0; i < segment.length; i++) { |
| byte cur = segment[i]; |
| if (foundElement != null && foundElement.hasMore()) { |
| foundElement.parse(cur, i); |
| continue; |
| } else { |
| char c = (char)cur; |
| if (!lastWasSpecial) { |
| specialDelimiter = (c == '{' || c == '}' || c == '[' || c == ']' |
| || (!token.toString().equals("") && c == '/')); |
| boolean isNotBreak = !(c == ' ' || c == '\r' || cur == 15 || cur == 12 |
| || cur == 10); |
| if (isNotBreak && !specialDelimiter) { |
| token.append(c); |
| continue; |
| } |
| } else { |
| lastWasSpecial = false; |
| token.append(c); |
| if (token.toString().equals("/")) { |
| continue; |
| } |
| } |
| } |
| try { |
| boolean setOp = false; |
| if ((foundElement == null || !foundElement.hasMore()) && token.length() > 1 |
| && token.charAt(0) == '/' && tokens.size() != 1 || hasEndToken(token.toString())) { |
| operator = token.toString(); |
| setOp = true; |
| if (tokens.size() > 2 && tokens.get(tokens.size() - 1).equals("def")) { |
| PSVariable newVar = new PSVariable(tokens.get(0), startPoint); |
| newVar.setValue(tokens.get(1)); |
| newVar.setEndPoint(i - operator.length()); |
| parsedElements.add(newVar); |
| } |
| tokens.clear(); |
| startPoint = i - token.length(); |
| } |
| if (operator != null) { |
| if (foundElement instanceof PSSubroutine) { |
| PSSubroutine sub = (PSSubroutine)foundElement; |
| subroutines.put(sub.getOperator(), sub); |
| parsedElements.add(sub); |
| if (!setOp) { |
| operator = ""; |
| } |
| } else { |
| if (foundElement != null) { |
| if (!hasMatch(foundElement.getOperator(), parsedElements)) { |
| parsedElements.add(foundElement); |
| } else { |
| LOG.warn("Duplicate " + foundElement.getOperator() |
| + " in font file, Ignoring."); |
| } |
| } |
| } |
| //Compare token against patterns and create an element if matched |
| foundElement = createElement(operator, token.toString(), startPoint); |
| } |
| } finally { |
| tokens.add(token.toString()); |
| token = new StringBuilder(); |
| if (specialDelimiter) { |
| specialDelimiter = false; |
| lastWasSpecial = true; |
| //Retrace special postscript character so it can be processed separately |
| i--; |
| } |
| } |
| } |
| return parsedElements; |
| } |
| |
| private boolean hasEndToken(String token) { |
| return token.equals("currentdict"); |
| } |
| |
| private boolean hasMatch(String operator, List<PSElement> elements) { |
| for (PSElement element : elements) { |
| if (element.getOperator().equals(operator)) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| public PSElement createElement(String operator, String elementID, int startPoint) { |
| if (operator.equals("")) { |
| return null; |
| } |
| if (elementID.equals(FIXED_ARRAY)) { |
| return new PSFixedArray(operator, startPoint); |
| } else if (elementID.equals(VARIABLE_ARRAY)) { |
| return new PSVariableArray(operator, startPoint); |
| } else if (elementID.equals(SUBROUTINE)) { |
| return new PSSubroutine(operator, startPoint); |
| } else if (!operator.equals("/Private") && elementID.equals(DICTIONARY)) { |
| return new PSDictionary(operator, startPoint); |
| } |
| return null; |
| } |
| |
| /** |
| * A base Postscript element class |
| */ |
| public abstract class PSElement { |
| /* The identifying operator for this element */ |
| protected String operator; |
| private List<Byte> token; |
| /* Determines whether there is any more data to be read whilst parsing */ |
| protected boolean hasMore = true; |
| /* The locations of any entries containing binary data (e.g. arrays) */ |
| protected LinkedHashMap<String, int[]> binaryEntries; |
| /* The tokens parsed from the current element */ |
| protected List<String> tokens; |
| /* Determines whether binary data is currently being read / parsed */ |
| protected boolean readBinary = false; |
| /* The location of the element within the binary data */ |
| private int startPoint = -1; |
| protected int endPoint = -1; |
| /* A flag to determine if unexpected postscript has been found in the element */ |
| private boolean foundUnexpected = false; |
| |
| public PSElement(String operator, int startPoint) { |
| this.operator = operator; |
| this.startPoint = startPoint; |
| token = new ArrayList<Byte>(); |
| binaryEntries = new LinkedHashMap<String, int[]>(); |
| tokens = new ArrayList<String>(); |
| } |
| |
| /** |
| * Gets the Postscript element operator |
| * @return The operator returned as a string |
| */ |
| public String getOperator() { |
| return operator; |
| } |
| |
| /** |
| * The start location of the element within the source binary data |
| * @return The start location returned as an integer |
| */ |
| public int getStartPoint() { |
| return startPoint; |
| } |
| |
| /** |
| * The end location of the element within the source binary data |
| * @return The end location returned as an integer |
| */ |
| public int getEndPoint() { |
| return endPoint; |
| } |
| |
| /** |
| * Takes over the task of tokenizing the byte data |
| * @param cur The current byte being read |
| */ |
| public void parse(byte cur, int pos) throws UnsupportedEncodingException { |
| if (!readBinary) { |
| char c = (char)cur; |
| boolean specialDelimiter = (c == '{' || c == '}' || c == '[' || c == ']' |
| || c == '(' || c == ')'); |
| boolean isNotValidBreak = !(c == ' ' || cur == 15 || cur == 12 || c == '\r' |
| || c == 10); |
| if (isNotValidBreak && !specialDelimiter) { |
| token.add(cur); |
| } else { |
| parseToken(pos); |
| } |
| if (specialDelimiter) { |
| token.add(cur); |
| parseToken(pos); |
| } |
| } else { |
| parseByte(cur, pos); |
| } |
| } |
| |
| private void parseToken(int pos) throws UnsupportedEncodingException { |
| byte[] bytesToken = new byte[token.size()]; |
| for (int i = 0; i < token.size(); i++) { |
| bytesToken[i] = token.get(i).byteValue(); |
| } |
| parseToken(new String(bytesToken, "ASCII"), pos); |
| token.clear(); |
| } |
| |
| /** |
| * Passes responsibility for processing the byte stream to the PostScript object |
| * @param cur The byte currently being read |
| * @param pos The position of the given byte |
| */ |
| public abstract void parseByte(byte cur, int pos); |
| |
| /** |
| * Delegates the parse routine to a sub class |
| * @param token The token which to parse |
| */ |
| public abstract void parseToken(String token, int curPos); |
| |
| protected boolean isInteger(String intValue) { |
| try { |
| Integer.parseInt(intValue); |
| return true; |
| } catch (NumberFormatException ex) { |
| return false; |
| } |
| } |
| |
| public LinkedHashMap<String, int[]> getBinaryEntries() { |
| return binaryEntries; |
| } |
| |
| /** |
| * Gets the binary entry location of a given index from the array |
| * @param index The index for which to retrieve the binary data location |
| * @return |
| */ |
| public int[] getBinaryEntryByIndex(int index) { |
| int count = 0; |
| for (Entry<String, int[]> entry : binaryEntries.entrySet()) { |
| if (count == index) { |
| return entry.getValue(); |
| } |
| count++; |
| } |
| return new int[0]; |
| } |
| |
| /** |
| * Determines if more data is still to be parsed for the Postscript element. |
| * @return Returns true if more data exists |
| */ |
| public boolean hasMore() { |
| return hasMore; |
| } |
| |
| /** |
| * Sets a value to be true if an expected entry postscript is found in the element. |
| * An example is where the encoding table may have a series of postscript operators |
| * altering the state of the array. In this case the only option will be to |
| * fully embed the font to avoid incorrect encoding in the resulting subset. |
| * @param foundUnexpected true if unexpected postscript is found. |
| */ |
| protected void setFoundUnexpected(boolean foundUnexpected) { |
| this.foundUnexpected = foundUnexpected; |
| } |
| |
| /** |
| * Returns whether unexpected postscript has been found in the element |
| * @return true if unexpected postscript is found |
| */ |
| public boolean getFoundUnexpected() { |
| return this.foundUnexpected; |
| } |
| } |
| |
| /** |
| * An object representing a Postscript array with a fixed number of entries |
| */ |
| public class PSFixedArray extends PSElement { |
| |
| private String entry = ""; |
| private String token = ""; |
| private boolean finished = false; |
| protected int binaryLength = 0; |
| /* A list containing each entry and it's contents in the array */ |
| private HashMap<Integer, String> entries; |
| private static final String READ_ONLY = "readonly"; |
| |
| public PSFixedArray(String operator, int startPoint) { |
| super(operator, startPoint); |
| entries = new HashMap<Integer, String>(); |
| } |
| |
| @Override |
| public void parseToken(String token, int curPos) { |
| if (!checkForEnd(token) || token.equals("def")) { |
| hasMore = false; |
| endPoint = curPos; |
| return; |
| } |
| if (token.equals("dup")) { |
| if (entry.startsWith("dup")) { |
| addEntry(entry); |
| } |
| entry = ""; |
| tokens.clear(); |
| } |
| if (!token.equals(READ_ONLY)) { |
| entry += token + " "; |
| } |
| if (!token.trim().equals("")) { |
| tokens.add(token); |
| } |
| if (tokens.size() == 4 && tokens.get(0).equals("dup") && isInteger(tokens.get(2))) { |
| binaryLength = Integer.parseInt(tokens.get(2)); |
| readBinary = true; |
| } |
| } |
| |
| private boolean checkForEnd(String checkToken) { |
| boolean subFound = false; |
| //Check for a subroutine matching that of an array end definition |
| PSSubroutine sub = subroutines.get("/" + checkToken); |
| if (sub != null && sub.getSubroutine().contains("def")) { |
| subFound = true; |
| } |
| if (!finished && (subFound || checkToken.equals("def"))) { |
| finished = true; |
| addEntry(entry); |
| return false; |
| } else { |
| return !finished; |
| } |
| } |
| |
| /** |
| * Gets a map of array entries identified by index |
| * @return Returns the map of array entries |
| */ |
| public HashMap<Integer, String> getEntries() { |
| return entries; |
| } |
| |
| private void addEntry(String entry) { |
| if (!entry.equals("")) { |
| if (entry.indexOf('/') != -1 && entry.charAt(entry.indexOf('/') - 1) != ' ') { |
| entry = entry.replace("/", " /"); |
| } |
| int entryLen; |
| do { |
| entryLen = entry.length(); |
| entry = entry.replace(" ", " "); |
| } while (entry.length() != entryLen); |
| Scanner s = new Scanner(entry).useDelimiter(" "); |
| boolean valid = false; |
| do { |
| s.next(); |
| if (!s.hasNext()) { |
| break; |
| } |
| int id = s.nextInt(); |
| entries.put(id, entry); |
| valid = true; |
| } while (false); |
| if (!valid) { |
| setFoundUnexpected(true); |
| } |
| } |
| } |
| |
| @Override |
| public void parseByte(byte cur, int pos) { |
| if (binaryLength > 0) { |
| token += (char)cur; |
| binaryLength--; |
| } else { |
| if (readBinary) { |
| int bLength = Integer.parseInt(tokens.get(2)); |
| int start = pos - bLength; |
| int end = start + bLength; |
| binaryEntries.put(tokens.get(1), new int[] {start, end}); |
| token = ""; |
| readBinary = false; |
| } else { |
| tokens.add(token); |
| parseToken(token, pos); |
| token = ""; |
| } |
| } |
| } |
| } |
| |
| /** |
| * An object representing a Postscript array with a variable number of entries |
| */ |
| public class PSVariableArray extends PSElement { |
| private int level = 0; |
| private List<String> arrayItems; |
| private String entry = ""; |
| |
| public PSVariableArray(String operator, int startPoint) { |
| super(operator, startPoint); |
| arrayItems = new ArrayList<String>(); |
| } |
| |
| @Override |
| public void parseToken(String token, int curPos) { |
| entry += token + " "; |
| if (level <= 0 && token.length() > 0 && token.charAt(0) == ']') { |
| hasMore = false; |
| endPoint = curPos; |
| return; |
| } |
| /* If the array item is a subroutine, the following keeps track of the current level |
| * of the tokens being parsed so that it can identify the finish */ |
| if (token.equals("{")) { |
| level++; |
| } else if (token.equals("}")) { |
| level--; |
| if (!entry.equals("") && level == 0) { |
| arrayItems.add(entry); |
| entry = ""; |
| } |
| } |
| } |
| |
| /** |
| * Gets a list of found array entries within the variable array |
| * @return Returns the found array elements as a list |
| */ |
| public List<String> getEntries() { |
| return arrayItems; |
| } |
| |
| @Override |
| public void parseByte(byte cur, int pos) { |
| //Not currently used |
| } |
| } |
| |
| /** |
| * An object representing a Postscript subroutine element |
| */ |
| public class PSSubroutine extends PSElement { |
| private int level = 1; |
| private String entry = ""; |
| |
| public PSSubroutine(String operator, int startPoint) { |
| super(operator, startPoint); |
| } |
| |
| @Override |
| public void parseToken(String token, int curPos) { |
| if (level == 0 && token.length() > 0 && (token.equals("def") || token.equals("ifelse") |
| || token.charAt(0) == '}')) { |
| hasMore = false; |
| endPoint = curPos; |
| return; |
| } |
| if (token.equals("{")) { |
| level++; |
| } else if (token.equals("}")) { |
| level--; |
| } |
| entry += token + " "; |
| } |
| |
| /** |
| * Gets the parsed subroutine element as unmodified string |
| * @return The subroutine as a string |
| */ |
| public String getSubroutine() { |
| return entry.trim(); |
| } |
| |
| @Override |
| public void parseByte(byte cur, int pos) { |
| //Not currently used |
| } |
| } |
| |
| /** |
| * An object representing a Postscript dictionary |
| */ |
| public class PSDictionary extends PSElement { |
| /* A list of dictionary entries which they themselves could be variables, |
| * subroutines and arrays, This is currently left as parsed Strings as there is |
| * no need to delve deeper for our current purposes. */ |
| private HashMap<String, String> entries; |
| private String entry = ""; |
| private String token = ""; |
| protected int binaryLength = 0; |
| |
| public PSDictionary(String operator, int startPoint) { |
| super(operator, startPoint); |
| entries = new HashMap<String, String>(); |
| } |
| |
| @Override |
| public void parseToken(String token, int curPos) { |
| if (token.equals("end")) { |
| addEntry(entry); |
| hasMore = false; |
| endPoint = curPos; |
| return; |
| } |
| if (token.startsWith("/")) { |
| if (entry.trim().startsWith("/")) { |
| tokens.clear(); |
| addEntry(entry); |
| } |
| entry = ""; |
| } |
| if (tokens.size() >= 1 || token.startsWith("/")) { |
| tokens.add(token); |
| } |
| entry += token + " "; |
| if (tokens.size() == 3 && tokens.get(0).startsWith("/") && !tokens.get(2).equals("def") |
| && isInteger(tokens.get(1))) { |
| binaryLength = Integer.parseInt(tokens.get(1)); |
| readBinary = true; |
| } |
| } |
| |
| /** |
| * Gets a map of dictionary entries identified by their name |
| * @return Returns the dictionary entries as a map |
| */ |
| public HashMap<String, String> getEntries() { |
| return entries; |
| } |
| |
| private void addEntry(String entry) { |
| Scanner s = new Scanner(entry).useDelimiter(" "); |
| String id = s.next(); |
| entries.put(id, entry); |
| } |
| |
| @Override |
| public void parseByte(byte cur, int pos) { |
| if (binaryLength > 0) { |
| binaryLength--; |
| } else { |
| if (readBinary) { |
| int start = pos - Integer.parseInt(tokens.get(1)); |
| int end = pos; |
| binaryEntries.put(tokens.get(0), new int[] {start, end}); |
| readBinary = false; |
| } else { |
| tokens.add(token); |
| parseToken(token, pos); |
| } |
| } |
| } |
| } |
| |
| /** |
| * An object representing a Postscript variable |
| */ |
| public class PSVariable extends PSElement { |
| |
| /* The value of the parsed Postscript variable. */ |
| private String value = ""; |
| |
| public PSVariable(String operator, int startPoint) { |
| super(operator, startPoint); |
| } |
| |
| @Override |
| public void parseToken(String token, int curPos) { |
| if (token.equals("def")) { |
| hasMore = false; |
| endPoint = curPos; |
| return; |
| } |
| } |
| |
| @Override |
| public void parseByte(byte cur, int pos) { |
| //Not currently used |
| } |
| |
| /** |
| * Sets the value of the Postscript variable value |
| * @param value The value to set |
| */ |
| public void setValue(String value) { |
| this.value = value; |
| } |
| |
| /** |
| * Gets the value of the Postscript variable |
| * @return Returns the value as a String |
| */ |
| public String getValue() { |
| return value; |
| } |
| |
| /** |
| * Sets the end point location of the current Postscript variable. |
| * @param endPoint The end point location as an integer |
| */ |
| public void setEndPoint(int endPoint) { |
| this.endPoint = endPoint; |
| } |
| |
| } |
| } |