Parse PPDrawing more deterministically

The [MS-PPT] spec is very clear about what the format of the PPDrawing record must be, and parsing deterministically makes for clearer code.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1887396 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java b/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java
index de65b23..396fbc0 100644
--- a/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java
+++ b/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java
@@ -19,8 +19,8 @@
 
 import java.io.IOException;
 import java.io.OutputStream;
-import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -46,11 +46,8 @@
 import org.apache.poi.ddf.EscherTextboxRecord;
 import org.apache.poi.sl.usermodel.ShapeType;
 import org.apache.poi.util.GenericRecordUtil;
-import org.apache.poi.util.IOUtils;
 import org.apache.poi.util.LittleEndian;
 
-import static org.apache.logging.log4j.util.Unbox.box;
-
 /**
  * These are actually wrappers onto Escher drawings. Make use of
  *  the DDF classes to do useful things with them.
@@ -58,23 +55,22 @@
  *  PowerPoint (hslf) records found within the EscherTextboxRecord
  *  (msofbtClientTextbox) records.
  * Also provides easy access to the EscherTextboxRecords, so that their
- *  text may be extracted and used in Sheets
+ *  text may be extracted and used in Sheets.
+ * <p>
+ * {@code [MS-PPT] - v20210216} refers to this as a {@code DrawingContainer}.
  */
 
 // For now, pretending to be an atom. Might not always be, but that
 //  would require a wrapping class
 public final class PPDrawing extends RecordAtom implements Iterable<EscherRecord> {
 
-	//arbitrarily selected; may need to increase
-	private static final int MAX_RECORD_LENGTH = 10_485_760;
-
-
-	private byte[] _header;
+	private final byte[] _header;
 	private long _type;
 
-	private final List<EscherRecord> childRecords = new ArrayList<>();
 	private EscherTextboxWrapper[] textboxWrappers;
 
+	private final EscherContainerRecord dgContainer = new EscherContainerRecord();
+
 	//cached EscherDgRecord
 	private EscherDgRecord dg;
 
@@ -82,11 +78,11 @@
 	 * Get access to the underlying Escher Records
 	 */
 	@SuppressWarnings("WeakerAccess")
-	public List<EscherRecord> getEscherRecords() { return childRecords; }
+	public List<EscherRecord> getEscherRecords() { return Collections.singletonList(dgContainer); }
 
 	@Override
 	public Iterator<EscherRecord> iterator() {
-		return childRecords.iterator();
+		return getEscherRecords().iterator();
 	}
 
 	/**
@@ -121,26 +117,17 @@
 		// Get the type
 		_type = LittleEndian.getUShort(_header,2);
 
-		// Get the contents for now
-		final byte[] contents = IOUtils.safelyClone(source, start, len, MAX_RECORD_LENGTH);
-
 		// Build up a tree of Escher records contained within
 		final DefaultEscherRecordFactory erf = new HSLFEscherRecordFactory();
-		findEscherChildren(erf, contents, 8, len-8, childRecords);
-		EscherContainerRecord dgContainer = getDgContainer();
+		dgContainer.fillFields(source, start + 8, erf);
+		assert dgContainer.getRecordId() == EscherRecordTypes.DG_CONTAINER.typeID;
+		dg = dgContainer.getChildById(EscherRecordTypes.DG.typeID);
 
-		if (dgContainer != null) {
-			textboxWrappers = Stream.of(dgContainer).
-				flatMap(findEscherContainer(EscherRecordTypes.SPGR_CONTAINER)).
-				flatMap(findEscherContainer(EscherRecordTypes.SP_CONTAINER)).
-				flatMap(PPDrawing::getTextboxHelper).
-				toArray(EscherTextboxWrapper[]::new);
-		} else {
-			// Find and EscherTextboxRecord's, and wrap them up
-			final List<EscherTextboxWrapper> textboxes = new ArrayList<>();
-			findEscherTextboxRecord(childRecords, textboxes);
-			this.textboxWrappers = textboxes.toArray(new EscherTextboxWrapper[0]);
-		}
+		textboxWrappers = Stream.of(dgContainer).
+			flatMap(findEscherContainer(EscherRecordTypes.SPGR_CONTAINER)).
+			flatMap(findEscherContainer(EscherRecordTypes.SP_CONTAINER)).
+			flatMap(PPDrawing::getTextboxHelper).
+			toArray(EscherTextboxWrapper[]::new);
 	}
 
 	private static Stream<EscherTextboxWrapper> getTextboxHelper(EscherContainerRecord spContainer) {
@@ -186,66 +173,6 @@
 	}
 
 	/**
-	 * Tree walking way of finding Escher Child Records
-	 */
-	private void findEscherChildren(DefaultEscherRecordFactory erf, byte[] source, int startPos, int lenToGo, List<EscherRecord> found) {
-
-		int escherBytes = LittleEndian.getInt( source, startPos + 4 ) + 8;
-
-		// Find the record
-		EscherRecord r = erf.createRecord(source,startPos);
-		// Fill it in
-		r.fillFields( source, startPos, erf );
-		// Save it
-		found.add(r);
-
-		// Wind on
-		int size = r.getRecordSize();
-		if(size < 8) {
-			LOG.atWarn().log("Hit short DDF record at {} - {}", box(startPos),box(size));
-		}
-
-		/*
-		 * Sanity check. Always advance the cursor by the correct value.
-		 *
-		 * getRecordSize() must return exactly the same number of bytes that was written in fillFields.
-		 * Sometimes it is not so, see an example in bug #44770. Most likely reason is that one of ddf records calculates wrong size.
-		 */
-		if(size != escherBytes){
-			LOG.atWarn().log("Record length={} but getRecordSize() returned {}; record: {}", box(escherBytes),box(r.getRecordSize()),r.getClass());
-			size = escherBytes;
-		}
-		startPos += size;
-		lenToGo -= size;
-		if(lenToGo >= 8) {
-			findEscherChildren(erf, source, startPos, lenToGo, found);
-		}
-	}
-
-	/**
-	 * Look for EscherTextboxRecords
-	 */
-	private void findEscherTextboxRecord(List<EscherRecord> toSearch, List<EscherTextboxWrapper> found) {
-	    EscherSpRecord sp = null;
-	    for (EscherRecord r : toSearch) {
-	        if (r instanceof EscherSpRecord) {
-	            sp = (EscherSpRecord)r;
-	        } else if (r instanceof EscherTextboxRecord) {
-				EscherTextboxRecord tbr = (EscherTextboxRecord)r;
-				EscherTextboxWrapper w = new EscherTextboxWrapper(tbr);
-				if (sp != null) {
-				    w.setShapeId(sp.getShapeId());
-				}
-				found.add(w);
-			} else if (r.isContainerRecord()) {
-				// If it has children, walk them
-				List<EscherRecord> children = r.getChildRecords();
-				findEscherTextboxRecord(children,found);
-			}
-		}
-	}
-
-	/**
 	 * We are type 1036
 	 */
 	public long getRecordType() { return _type; }
@@ -268,9 +195,7 @@
 
 		// Find the new size of the escher children;
 		int newSize = 0;
-		for(EscherRecord er : childRecords) {
-			newSize += er.getRecordSize();
-		}
+		newSize += dgContainer.getRecordSize();
 
 		// Update the size (header bytes 5-8)
 		LittleEndian.putInt(_header,4,newSize);
@@ -281,9 +206,7 @@
 		// Now grab the children's data
 		byte[] b = new byte[newSize];
 		int done = 0;
-		for(EscherRecord r : childRecords) {
-		    done += r.serialize( done, b );
-		}
+		dgContainer.serialize(done, b);
 
 		// Finally, write out the children
 		out.write(b);
@@ -293,7 +216,6 @@
 	 * Create the Escher records associated with a new PPDrawing
 	 */
 	private void create(){
-		EscherContainerRecord dgContainer = new EscherContainerRecord();
 		dgContainer.setRecordId( EscherContainerRecord.DG_CONTAINER );
 		dgContainer.setOptions((short)15);
 
@@ -342,8 +264,6 @@
 		spContainer.addChildRecord(opt);
 
 		dgContainer.addChildRecord(spContainer);
-
-		childRecords.add(dgContainer);
 	}
 
 	/**
@@ -362,7 +282,7 @@
 	 * @since POI 3.14-Beta2
 	 */
 	public EscherContainerRecord getDgContainer() {
-		return (EscherContainerRecord)firstEscherRecord(this, EscherRecordTypes.DG_CONTAINER).orElse(null);
+		return dgContainer;
 	}
 
 	/**
@@ -371,24 +291,17 @@
 	 * @return EscherDgRecord
 	 */
 	public EscherDgRecord getEscherDgRecord(){
-		if (dg == null) {
-			firstEscherRecord(this, EscherRecordTypes.DG_CONTAINER).
-			flatMap(c -> firstEscherRecord((EscherContainerRecord)c, EscherRecordTypes.DG)).
-			ifPresent(c -> dg = (EscherDgRecord)c);
-		}
 		return dg;
 	}
 
     public StyleTextProp9Atom[] getNumberedListInfo() {
-		EscherContainerRecord dgContainer = getDgContainer();
-
-		return (dgContainer == null) ? new StyleTextProp9Atom[0] : Stream.of(dgContainer).
-			flatMap(findEscherContainer(EscherRecordTypes.SPGR_CONTAINER)).
-			flatMap(findEscherContainer(EscherRecordTypes.SP_CONTAINER)).
-			map(PPDrawing::findInSpContainer).
-			filter(Optional::isPresent).
-			map(Optional::get).
-			toArray(StyleTextProp9Atom[]::new);
+		return Stream.of(dgContainer).
+					flatMap(findEscherContainer(EscherRecordTypes.SPGR_CONTAINER)).
+					flatMap(findEscherContainer(EscherRecordTypes.SP_CONTAINER)).
+					map(PPDrawing::findInSpContainer).
+					filter(Optional::isPresent).
+					map(Optional::get).
+					toArray(StyleTextProp9Atom[]::new);
     }
 
 	@Override