| /* ==================================================================== |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==================================================================== */ |
| |
| package org.apache.poi.hslf.dev; |
| |
| import java.io.File; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.PrintStream; |
| import java.util.Locale; |
| |
| import org.apache.poi.ddf.DefaultEscherRecordFactory; |
| import org.apache.poi.ddf.EscherContainerRecord; |
| import org.apache.poi.ddf.EscherRecord; |
| import org.apache.poi.ddf.EscherTextboxRecord; |
| import org.apache.poi.hslf.record.HSLFEscherRecordFactory; |
| import org.apache.poi.hslf.record.RecordTypes; |
| import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; |
| import org.apache.poi.util.HexDump; |
| import org.apache.poi.util.IOUtils; |
| import org.apache.poi.util.LittleEndian; |
| |
| /** |
| * This class provides a way to "peek" inside a powerpoint file. It |
| * will print out all the types it find, and for those it know aren't |
| * atoms, what they contain |
| * |
| * To figure out what things are, and if they are atoms or not, used the |
| * list in hslf.record.RecordTypes |
| * |
| * To peek inside PPDrawings, which hold Escher drawings, we use the |
| * DDF package from POI (but we can fake it by using the Escher listings |
| * from hslf.record.RecordTypes also) |
| */ |
| public final class SlideShowDumper { |
| private byte[] docstream; |
| |
| /** Do we try to use DDF to understand the escher objects? */ |
| private boolean ddfEscher = false; |
| /** Do we use our own built-in basic escher groker to understand the escher objects? */ |
| private boolean basicEscher = false; |
| |
| private PrintStream out; |
| |
| /** |
| * right now this function takes one parameter: a ppt file, and outputs |
| * a dump of what it contains |
| */ |
| public static void main(String args[]) throws IOException |
| { |
| if(args.length == 0) { |
| System.err.println("Useage: SlideShowDumper [-escher|-basicescher] <filename>"); |
| return; |
| } |
| |
| String filename = args[0]; |
| if(args.length > 1) { |
| filename = args[1]; |
| } |
| |
| NPOIFSFileSystem poifs = new NPOIFSFileSystem(new File(filename)); |
| SlideShowDumper foo = new SlideShowDumper(poifs, System.out); |
| poifs.close(); |
| |
| if(args.length > 1) { |
| if(args[0].equalsIgnoreCase("-escher")) { |
| foo.setDDFEscher(true); |
| } else { |
| foo.setBasicEscher(true); |
| } |
| } |
| |
| foo.printDump(); |
| } |
| |
| /** |
| * Constructs a Powerpoint dump from a POIFS Filesystem. Parses the |
| * document and dumps out the contents |
| * |
| * @param filesystem the POIFS FileSystem to read from |
| * @throws IOException if there is a problem while parsing the document. |
| */ |
| public SlideShowDumper(NPOIFSFileSystem filesystem, PrintStream out) throws IOException { |
| // Grab the document stream |
| InputStream is = filesystem.createDocumentInputStream("PowerPoint Document"); |
| docstream = IOUtils.toByteArray(is); |
| is.close(); |
| this.out = out; |
| } |
| |
| /** |
| * Control dumping of any Escher records found - should DDF be used? |
| */ |
| public void setDDFEscher(boolean grok) { |
| ddfEscher = grok; |
| basicEscher = !(grok); |
| } |
| |
| /** |
| * Control dumping of any Escher records found - should our built in |
| * basic groker be used? |
| */ |
| public void setBasicEscher(boolean grok) { |
| basicEscher = grok; |
| ddfEscher = !(grok); |
| } |
| |
| public void printDump() throws IOException { |
| // The format of records in a powerpoint file are: |
| // <little endian 2 byte "info"> |
| // <little endian 2 byte "type"> |
| // <little endian 4 byte "length"> |
| // If it has a zero length, following it will be another record |
| // <xx xx yy yy 00 00 00 00> <xx xx yy yy zz zz zz zz> |
| // If it has a length, depending on its type it may have children or data |
| // If it has children, these will follow straight away |
| // <xx xx yy yy zz zz zz zz <xx xx yy yy zz zz zz zz>> |
| // If it has data, this will come straigh after, and run for the length |
| // <xx xx yy yy zz zz zz zz dd dd dd dd dd dd dd> |
| // All lengths given exclude the 8 byte record header |
| // (Data records are known as Atoms) |
| |
| // Document should start with: |
| // 0F 00 E8 03 ## ## ## ## |
| // (type 1000 = document, info 00 0f is normal, rest is document length) |
| // 01 00 E9 03 28 00 00 00 |
| // (type 1001 = document atom, info 00 01 normal, 28 bytes long) |
| |
| // When parsing a document, look to see if you know about that type |
| // of the current record. If you know it's a type that has children, |
| // process the record's data area looking for more records |
| // If you know about the type and it doesn't have children, either do |
| // something with the data (eg TextRun) or skip over it |
| // Otherwise, check the first byte. If you do a BINARY_AND on it with |
| // 0x0f (15) and get back 0x0f, you know it has children. Otherwise |
| // it doesn't |
| |
| walkTree(0,0,docstream.length); |
| } |
| |
| public void walkTree(int depth, int startPos, int maxLen) throws IOException { |
| int pos = startPos; |
| int endPos = startPos + maxLen; |
| final String ind = (depth == 0) ? "%1$s" : "%1$"+depth+"s"; |
| while(pos <= endPos - 8) { |
| long type = LittleEndian.getUShort(docstream,pos+2); |
| long len = LittleEndian.getUInt(docstream,pos+4); |
| byte opt = docstream[pos]; |
| |
| String fmt = ind+"At position %2$d (%2$04x): type is %3$d (%3$04x), len is %4$d (%4$04x)"; |
| out.println(String.format(Locale.ROOT, fmt, "", pos, type, len)); |
| |
| // See if we know about the type of it |
| String recordName = RecordTypes.forTypeID((short)type).name(); |
| |
| // Jump over header, and think about going on more |
| pos += 8; |
| out.println(String.format(Locale.ROOT, ind+"That's a %2$s", "", recordName)); |
| |
| // Now check if it's a container or not |
| int container = opt & 0x0f; |
| |
| // BinaryTagData seems to contain records, but it |
| // isn't tagged as doing so. Try stepping in anyway |
| if(type == 5003L && opt == 0L) { |
| container = 0x0f; |
| } |
| |
| out.println(); |
| if (type != 0L && container == 0x0f) { |
| if (type == 1035l || type == 1036l) { |
| // Special Handling of 1035=PPDrawingGroup and 1036=PPDrawing |
| if(ddfEscher) { |
| // Seems to be: |
| walkEscherDDF((depth+3),pos+8,(int)len-8); |
| } else if(basicEscher) { |
| walkEscherBasic((depth+3),pos+8,(int)len-8); |
| } |
| } else { |
| // General container record handling code |
| walkTree((depth+2),pos,(int)len); |
| } |
| } |
| |
| pos += (int)len; |
| } |
| } |
| |
| /** |
| * Use the DDF code to walk the Escher records |
| */ |
| public void walkEscherDDF(int indent, int pos, int len) { |
| if(len < 8) { return; } |
| |
| final String ind = (indent == 0) ? "%1$s" : "%1$"+indent+"s"; |
| |
| byte[] contents = new byte[len]; |
| System.arraycopy(docstream,pos,contents,0,len); |
| DefaultEscherRecordFactory erf = new HSLFEscherRecordFactory(); |
| EscherRecord record = erf.createRecord(contents,0); |
| |
| // For now, try filling in the fields |
| record.fillFields(contents,0,erf); |
| |
| long atomType = LittleEndian.getUShort(contents,2); |
| // This lacks the 8 byte header size |
| long atomLen = LittleEndian.getUShort(contents,4); |
| // This (should) include the 8 byte header size |
| int recordLen = record.getRecordSize(); |
| |
| String fmt = ind+"At position %2$d (%2$04x): type is %3$d (%3$04x), len is %4$d (%4$04x) (%5$d) - record claims %6$d"; |
| out.println(String.format(Locale.ROOT, fmt, "", pos, atomType, atomLen, atomLen+8, recordLen)); |
| |
| |
| // Check for corrupt / lying ones |
| if(recordLen != 8 && (recordLen != (atomLen+8))) { |
| out.println(String.format(Locale.ROOT, ind+"** Atom length of $2d ($3d) doesn't match record length of %4d", "", atomLen, atomLen+8, recordLen)); |
| } |
| |
| // Print the record's details |
| String recordStr = record.toString().replace("\n", String.format(Locale.ROOT, "\n"+ind, "")); |
| out.println(String.format(Locale.ROOT, ind+"%2$s", "", recordStr)); |
| |
| if(record instanceof EscherContainerRecord) { |
| walkEscherDDF((indent+3), pos + 8, (int)atomLen ); |
| } |
| |
| // Handle records that seem to lie |
| if(atomType == 61451l) { |
| // Normally claims a size of 8 |
| recordLen = (int)atomLen + 8; |
| } |
| if(atomType == 61453l) { |
| // Returns EscherContainerRecord, but really msofbtClientTextbox |
| recordLen = (int)atomLen + 8; |
| record.fillFields( contents, 0, erf ); |
| if(! (record instanceof EscherTextboxRecord)) { |
| out.println(String.format(Locale.ROOT, ind+"%2$s", "", "** Really a msofbtClientTextbox !")); |
| } |
| } |
| |
| // Decide on what to do, based on how the lenghts match up |
| if(recordLen == 8 && atomLen > 8 ) { |
| // Assume it has children, rather than being corrupted |
| walkEscherDDF((indent+3), pos + 8, (int)atomLen ); |
| |
| // Wind on our length + our header |
| pos += atomLen; |
| pos += 8; |
| len -= atomLen; |
| len -= 8; |
| } else { |
| // No children, wind on our real length |
| pos += atomLen; |
| pos += 8; |
| len -= atomLen; |
| len -= 8; |
| } |
| |
| // Move on to the next one, if we're not at the end yet |
| if(len >= 8) { |
| walkEscherDDF(indent, pos, len ); |
| } |
| } |
| |
| /** |
| * Use the basic record format groking code to walk the Escher records |
| */ |
| public void walkEscherBasic(int indent, int pos, int len) throws IOException { |
| if(len < 8) { return; } |
| |
| final String ind = (indent == 0) ? "%1$s" : "%1$"+indent+"s"; |
| |
| long type = LittleEndian.getUShort(docstream,pos+2); |
| long atomlen = LittleEndian.getUInt(docstream,pos+4); |
| |
| String fmt = ind+"At position %2$d ($2$04x): type is %3$d (%3$04x), len is %4$d (%4$04x)"; |
| out.println(String.format(Locale.ROOT, fmt, "", pos, type, atomlen)); |
| |
| String typeName = RecordTypes.forTypeID((short)type).name(); |
| out.println(String.format(Locale.ROOT, ind+"%2$s", "That's an Escher Record: ", typeName)); |
| |
| // Record specific dumps |
| if(type == 61453l) { |
| // Text Box. Print out first 8 bytes of data, then 8 4 later |
| HexDump.dump(docstream, 0, out, pos+8, 8); |
| HexDump.dump(docstream, 0, out, pos+20, 8); |
| out.println(); |
| } |
| |
| |
| // Blank line before next entry |
| out.println(); |
| |
| // Look in children if we are a container |
| if(type == 61443l || type == 61444l) { |
| walkEscherBasic((indent+3), pos+8, (int)atomlen); |
| } |
| |
| // Keep going if not yet at end |
| if(atomlen < len) { |
| int atomleni = (int)atomlen; |
| walkEscherBasic(indent, pos+atomleni+8, len-atomleni-8); |
| } |
| } |
| } |