Tag as 3.0.1-RC3

git-svn-id: https://svn.apache.org/repos/asf/poi/tags/REL_3_0_1_RC3@551531 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/legal/NOTICE b/legal/NOTICE
index d417a36..190d974 100644
--- a/legal/NOTICE
+++ b/legal/NOTICE
@@ -1,5 +1,16 @@
-Apache Jakarta POI
+Apache POI
 Copyright 2001-2007 The Apache Software Foundation
 
 This product includes software developed by
 The Apache Software Foundation (http://www.apache.org/).
+
+
+Unit testing support is provided by JUnit, under the 
+Common Public License Version 1.0: 
+	http://www.opensource.org/licenses/cpl.php
+See http://www.junit.org/
+
+Small parts of the POI component HDGF are based on VSDump,
+and are under the GNU General Public Licence version 3 (GPL v3):
+	http://gplv3.fsf.org/
+See http://www.gnome.ru/projects/vsdump_en.html
diff --git a/src/documentation/content/xdocs/book.xml b/src/documentation/content/xdocs/book.xml
index 4666d77..a0f10c0 100644
--- a/src/documentation/content/xdocs/book.xml
+++ b/src/documentation/content/xdocs/book.xml
@@ -39,6 +39,7 @@
         <menu-item label="HWPF" href="hwpf/index.html"/>
         <menu-item label="HPSF" href="hpsf/index.html"/>
         <menu-item label="HSLF" href="hslf/index.html"/>
+        <menu-item label="HDGF" href="hdgf/index.html"/>
 		<menu-item label="POI-Ruby" href="poi-ruby.html"/>
         <menu-item label="POI-Utils" href="utils/index.html"/>
         <menu-item label="Download" href="ext:download"/>
diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml
index 3783e84..697395f 100644
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@@ -35,7 +35,7 @@
         <person id="YK" name="Yegor Kozlov" email="yegor@apache.org"/>
     </devs>
 
-        <release version="3.0.1-FINAL" date="2007-06-15">
+        <release version="3.0.1-FINAL" date="2007-07-05">
             <action dev="POI-DEVELOPERS" type="fix">Administrative updates to the Maven POMs, and the release artificat build process</action>
             <action dev="POI-DEVELOPERS" type="fix">23951 - [PATCH] Fix for HSSF setSheetOrder and tab names</action>
             <action dev="POI-DEVELOPERS" type="fix">42524 - [PATCH] Better HSLF support for problem shape groups</action>
@@ -44,6 +44,9 @@
             <action dev="POI-DEVELOPERS" type="add">Additional HSLF support for Title and Slide Master Sheets</action>
             <action dev="POI-DEVELOPERS" type="fix">42474 - [PATCH] Improved HSLF note to slide matching, and a NPE</action>
             <action dev="POI-DEVELOPERS" type="fix">42481 - [PATCH] Tweak some HSLF exceptions, to make it clearer what you're catching</action>
+            <action dev="POI-DEVELOPERS" type="fix">42667 - [PATCH] Fix for HSLF writing of files with tables</action>
+            <action dev="POI-DEVELOPERS" type="add">Improved way of detecting HSSF cells that contain dates, isADateFormat</action>
+            <action dev="POI-DEVELOPERS" type="add">Initial, read-only support for Visio documents, as HDGF</action>
         </release>
 
         <release version="3.0-FINAL" date="2007-05-18">
diff --git a/src/documentation/content/xdocs/hdgf/book.xml b/src/documentation/content/xdocs/hdgf/book.xml
new file mode 100644
index 0000000..fb37a33
--- /dev/null
+++ b/src/documentation/content/xdocs/hdgf/book.xml
@@ -0,0 +1,34 @@
+<?xml version="1.0"?>
+<!--
+   ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+   ====================================================================
+-->
+<!DOCTYPE book PUBLIC "-//APACHE//DTD Cocoon Documentation Book V1.0//EN" "../dtd/book-cocoon-v10.dtd">
+
+<book software="POI Project"
+    title="HGDF"
+    copyright="@year@ POI Project">
+
+    <menu label="Apache POI">
+        <menu-item label="Top" href="../index.html"/>
+    </menu>
+
+    <menu label="HDGF">
+        <menu-item label="Overview" href="index.html"/>
+	</menu>
+	
+</book>
diff --git a/src/documentation/content/xdocs/hdgf/index.xml b/src/documentation/content/xdocs/hdgf/index.xml
new file mode 100755
index 0000000..f14bb1e
--- /dev/null
+++ b/src/documentation/content/xdocs/hdgf/index.xml
@@ -0,0 +1,98 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+   ====================================================================
+-->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "../dtd/document-v11.dtd">
+
+<document>
+    <header>
+        <title>POI-HDGF - Java API To Access Microsoft Visio Format Files</title>
+        <subtitle>Overview</subtitle>
+        <authors>
+            <person name="Nick Burch" email="nick at apache dot org"/>
+        </authors>
+    </header>
+
+    <body>
+        <section>
+            <title>Overview</title>
+
+            <p>HDGF is the POI Project's pure Java implementation of the Visio file format.</p>
+            <p>Currently, HDGF provides a low-level, read-only api for 
+              accessing Visio documents. It also provides a 
+              <link href="http://svn.apache.org/repos/asf/poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/extractor/">way</link>
+              to extract the textual content from a file.
+            </p>
+			<p>At this time, there is no <em>usermodel</em> api or similar,
+			 only low level access to the streams, chunks and chunk commands.
+			 Users are advised to check the unit tests to see how everything
+			 works. They are also well advised to read the documentation
+			 supplied with 
+			 <link href="http://www.gnome.ru/projects/vsdump_en.html">vsdump</link>
+			 to get a feel for how Visio files are structured.</p>
+			<p>To get a feel for the contents of a file, and to track down
+			 where data of interest is stored, HDGF comes with
+			 <link href="http://svn.apache.org/repos/asf/poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/dev/">VSDDumper</link>
+			 to print out the contents of the file. Users should also make
+			 use of 
+			 <link href="http://www.gnome.ru/projects/vsdump_en.html">vsdump</link>
+			 to probe the structure of files.</p>
+            <note> 
+                This code currently lives the 
+                <link href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">scratchpad area</link> 
+                of the POI SVN repository. 
+				Ensure that you have the scratchpad jar or the scratchpad 
+                build area in your
+				classpath before experimenting with this code.
+			</note>
+
+			<section>
+				<title>Steps required for write support</title>
+				<p>Currently, HDGF is only able to read visio files, it is
+				 not able to write them back out again. We believe the
+				 following are the steps that would need to be taken to
+				 implement it.</p>
+				<ol>
+				 <li>Re-write the decompression support in LZW4HDGF to be
+				  less opaque, and also under the ASL.</li>
+				 <li>Add compression support to the new LZw4HDGF.</li>
+				 <li>Have HDGF just write back the raw bytes it read in, and
+				  have a test to ensure the file is un-changed.</li>
+				 <li>Have HDGF generate the bytes to write out from the
+				  Stream stores, using the compressed data as appropriate,
+				  without re-compressing. Plus test to ensure file is
+				  un-changed.</li>
+				 <li>Have HDGF generate the bytes to write out from the
+				  Stream stores, re-compressing any streams that were 
+                  decompressed. Plus test to ensure file is un-changed.</li>
+				 <li>Have HDGF re-generate the offsets in pointers for the
+				  locations of the streams. Plus test to ensure file is
+				  un-changed.</li>
+				 <li>Have HDGF re-generate the bytes for all the chunks, from
+				  the chunk commands. Tests to ensure the chunks are 
+				  serialized properly, and then that the file is un-changed</li>
+				 <li>Alter the data of one command, but keep it the same
+				  length, and check visio can open the file when written 
+				  out.</li>
+				 <li>Alter the data of one command, to a new length, and
+				  check that visio can open the file when written out.</li>
+				</ol>
+			</section>
+        </section>
+    </body>
+</document>
diff --git a/src/documentation/content/xdocs/hslf/book.xml b/src/documentation/content/xdocs/hslf/book.xml
index 0eb4f8c..8ccf5c1 100644
--- a/src/documentation/content/xdocs/hslf/book.xml
+++ b/src/documentation/content/xdocs/hslf/book.xml
@@ -20,7 +20,7 @@
 <!DOCTYPE book PUBLIC "-//APACHE//DTD Cocoon Documentation Book V1.0//EN" "../dtd/book-cocoon-v10.dtd">
 
 <book software="POI Project"
-    title="HSSF"
+    title="HSLF"
     copyright="@year@ POI Project">
 
     <menu label="Apache POI">
diff --git a/src/documentation/content/xdocs/hslf/index.xml b/src/documentation/content/xdocs/hslf/index.xml
index 779a279..16a3885 100755
--- a/src/documentation/content/xdocs/hslf/index.xml
+++ b/src/documentation/content/xdocs/hslf/index.xml
@@ -34,12 +34,12 @@
             <title>Overview</title>
 
             <p>HSLF is the POI Project's pure Java implementation of the Powerpoint file format.</p>
-            <p>HSSF provides a way to read powerpoint presentations, and extract text from it.
+            <p>HSLF provides a way to read powerpoint presentations, and extract text from it.
             It also provides some (currently limited) edit capabilities.
             </p>
             <note> 
                 This code currently lives the 
-                <link href="http://svn.apache.org/viewcvs.cgi/jakarta/poi/trunk/src/scratchpad/">scratchpad area</link> 
+                <link href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">scratchpad area</link> 
                 of the POI SVN repository. 
 				Ensure that you have the scratchpad jar or the scratchpad 
                 build area in your
diff --git a/src/documentation/content/xdocs/hssf/how-to.xml b/src/documentation/content/xdocs/hssf/how-to.xml
index cc578af..a4ac412 100644
--- a/src/documentation/content/xdocs/hssf/how-to.xml
+++ b/src/documentation/content/xdocs/hssf/how-to.xml
@@ -460,7 +460,7 @@
 <code>/src/scratchpad/examples/src/org/apache/poi/hssf/eventusermodel/examples/XLS2CSVmra.java</code>,
 and may be called on the command line, or from within your own code.
 The latest version is always available from
-<link href="http://svn.apache.org/repos/asf/jakarta/poi/trunk/src/scratchpad/examples/src/org/apache/poi/hssf/eventusermodel/examples/">subversion</link>.
+<link href="http://svn.apache.org/repos/asf/poi/trunk/src/scratchpad/examples/src/org/apache/poi/hssf/eventusermodel/examples/">subversion</link>.
 </p>
 <p>
 <em>This code is currently in the scratchpad section, so you will either
diff --git a/src/documentation/content/xdocs/hwpf/index.xml b/src/documentation/content/xdocs/hwpf/index.xml
index 1268fac..1556869 100644
--- a/src/documentation/content/xdocs/hwpf/index.xml
+++ b/src/documentation/content/xdocs/hwpf/index.xml
@@ -38,7 +38,7 @@
     to pure Java.</p>
 
   <p>HWPF is still in early development. It is in the <link
-     href="http://svn.apache.org/viewcvs.cgi/jakarta/poi/trunk/src/scratchpad/">
+     href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">
      scratchpad section of the SVN.</link> You will need to ensure you
      either have a recent SVN checkout, or a recent SVN nightly build
      (including the scratchpad jar!)</p>
diff --git a/src/documentation/content/xdocs/hwpf/quick-guide.xml b/src/documentation/content/xdocs/hwpf/quick-guide.xml
index 197922f..bf04625 100644
--- a/src/documentation/content/xdocs/hwpf/quick-guide.xml
+++ b/src/documentation/content/xdocs/hwpf/quick-guide.xml
@@ -30,7 +30,7 @@
 
     <body>
 		<p>HWPF is still in early development. It is in the <link
-     	href="http://svn.apache.org/viewcvs.cgi/jakarta/poi/trunk/src/scratchpad/">
+     	href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">
 		scratchpad section of the SVN.</link> You will need to ensure you
 		either have a recent SVN checkout, or a recent SVN nightly build
 		(including the scratchpad jar!)</p>
@@ -68,7 +68,7 @@
 		<section><title>Further Examples</title>
 		<p>For now, the best source of additional examples is in the unit 
 		tests. <link
-     	href="http://svn.apache.org/viewvc/jakarta/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/">
+     	href="http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/">
 		Browse the HWPF unit tests.</link>
 		</p>
 		</section>
diff --git a/src/documentation/content/xdocs/index.xml b/src/documentation/content/xdocs/index.xml
index da2d5ed..5968f80 100644
--- a/src/documentation/content/xdocs/index.xml
+++ b/src/documentation/content/xdocs/index.xml
@@ -38,6 +38,10 @@
 		<link href="http://www.apache.org/dyn/closer.cgi/poi/release/">download</link>
 		the source and binaries from your
 		<link href="http://www.apache.org/dyn/closer.cgi/poi/release/">local mirror</link>.</p>
+      <p>We would also like to confirm that verion 3.0 of Apache POI does
+		<em>not</em> contain any viruses. Users of broken virus checkers
+		which do detect a 94 byte file, sci_cec.db, as containing one are
+		advised to contact their vendor for a fix.</p>
 	</section>
 
     <section><title>Purpose</title>
@@ -107,12 +111,19 @@
 	  development. Jump in!</p>
         </section>
         <section><title>HSLF for PowerPoint Documents</title>
-	<p>HWSL is our port of the Microsoft PowerPoint 97(-2003) file format to pure
+	<p>HSLF is our port of the Microsoft PowerPoint 97(-2003) file format to pure
 	  Java. It supports read and write capabilities of some, but not yet all
       of the core records. Please see <link
 	    href="./hslf/index.html">the HSLF project page for more
 	    information</link>.</p>
         </section>
+        <section><title>HDGF for Visio Documents</title>
+	<p>HDGF is our port of the Microsoft Viso 97(-2003) file format to pure
+	  Java. It currently only supports reading at a very low level, and
+      simple text extraction. Please see <link
+	    href="./hdgf/index.html">the HDGF project page for more
+	    information</link>.</p>
+        </section>
         <section><title>HPSF for Document Properties</title>
 	<p>HPSF is our port of the OLE 2 property set format to pure
 	  Java. Property sets are mostly use to store a document's properties
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index ef5c5aa..b236f22 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -32,7 +32,7 @@
     </developers>
 
     <changes>
-        <release version="3.0.1-FINAL" date="2007-06-15">
+        <release version="3.0.1-FINAL" date="2007-07-05">
             <action dev="POI-DEVELOPERS" type="fix">Administrative updates to the Maven POMs, and the release artificat build process</action>
             <action dev="POI-DEVELOPERS" type="fix">23951 - [PATCH] Fix for HSSF setSheetOrder and tab names</action>
             <action dev="POI-DEVELOPERS" type="fix">42524 - [PATCH] Better HSLF support for problem shape groups</action>
@@ -41,6 +41,9 @@
             <action dev="POI-DEVELOPERS" type="add">Additional HSLF support for Title and Slide Master Sheets</action>
             <action dev="POI-DEVELOPERS" type="fix">42474 - [PATCH] Improved HSLF note to slide matching, and a NPE</action>
             <action dev="POI-DEVELOPERS" type="fix">42481 - [PATCH] Tweak some HSLF exceptions, to make it clearer what you're catching</action>
+            <action dev="POI-DEVELOPERS" type="fix">42667 - [PATCH] Fix for HSLF writing of files with tables</action>
+            <action dev="POI-DEVELOPERS" type="add">Improved way of detecting HSSF cells that contain dates, isADateFormat</action>
+            <action dev="POI-DEVELOPERS" type="add">Initial, read-only support for Visio documents, as HDGF</action>
         </release>
 
         <release version="3.0-FINAL" date="2007-05-18">
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/Chunk.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/Chunk.java
index 54c37b3..5928927 100644
--- a/src/scratchpad/src/org/apache/poi/hdgf/chunks/Chunk.java
+++ b/src/scratchpad/src/org/apache/poi/hdgf/chunks/Chunk.java
@@ -20,6 +20,9 @@
 
 import org.apache.poi.hdgf.chunks.ChunkFactory.CommandDefinition;
 import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.apache.poi.util.StringUtil;
 
 /**
  * Base of all chunks, which hold data, flags etc
@@ -44,6 +47,9 @@
 	/** The name of the chunk, as found from the commandDefinitions */
 	private String name;
 	
+	/** For logging warnings about the structure of the file */
+	private POILogger logger = POILogFactory.getLogger(Chunk.class);
+	
 	public Chunk(ChunkHeader header, ChunkTrailer trailer, ChunkSeparator separator, byte[] contents) {
 		this.header = header;
 		this.trailer = trailer;
@@ -148,7 +154,9 @@
 			
 			// Check we seem to have enough data
 			if(offset >= contents.length) {
-				System.err.println("Command offset " + offset + " past end of data at " + contents.length);
+				logger.log(POILogger.WARN, 
+						"Command offset " + offset + " past end of data at " + contents.length
+				);
 				continue;
 			}
 		
@@ -167,9 +175,27 @@
 						LittleEndian.getDouble(contents, offset)
 				);
 				break;
+			case 12:
+				// A Little Endian String
+				// Starts 8 bytes into the data segment
+				// Ends at end of data, or 00 00
+				int startsAt = 8;
+				int endsAt = startsAt;
+				for(int j=startsAt; j<contents.length-1 && endsAt == startsAt; j++) {
+					if(contents[j] == 0 && contents[j+1] == 0) {
+						endsAt = j;
+					}
+				}
+				if(endsAt == startsAt) {
+					endsAt = contents.length;
+				}
+				
+				int strLen = (endsAt-startsAt) / 2;
+				command.value = StringUtil.getFromUnicodeLE(contents, startsAt, strLen);
+				break;
 			case 25:
 				command.value = new Short(
-						LittleEndian.getShort(contents, offset)
+					LittleEndian.getShort(contents, offset)
 				);
 				break;
 			case 26:
@@ -188,7 +214,8 @@
 				break;
 				
 			default:
-				//System.err.println("Warning - Command of type " + type + " not processed!");
+				logger.log(POILogger.INFO, 
+						"Command of type " + type + " not processed!");
 			}
 			
 			// Add to the array
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkFactory.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkFactory.java
index efac0d3..fe0fc91 100644
--- a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkFactory.java
+++ b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkFactory.java
@@ -24,6 +24,9 @@
 import java.util.Hashtable;
 import java.util.StringTokenizer;
 
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
 /**
  * Factor class to create the appropriate chunks, which
  *  needs the version of the file to process the chunk header
@@ -42,6 +45,9 @@
 	private static String chunkTableName = 
 		"/org/apache/poi/hdgf/chunks/chunks_parse_cmds.tbl";
 	
+	/** For logging problems we spot with the file */
+	private POILogger logger = POILogFactory.getLogger(ChunkFactory.class);
+	
 	public ChunkFactory(int version) throws IOException {
 		this.version = version;
 		
@@ -107,7 +113,8 @@
 		// Check we have enough data, and tweak the header size
 		//  as required
 		if(endOfDataPos > data.length) {
-			System.err.println("Header called for " + header.getLength() +" bytes, but that would take us passed the end of the data!");
+			logger.log(POILogger.WARN,
+				"Header called for " + header.getLength() +" bytes, but that would take us passed the end of the data!");
 			
 			endOfDataPos = data.length;
 			header.length = data.length - offset - header.getSizeInBytes();
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV11.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV11.java
index 51eca56..c77a249 100644
--- a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV11.java
+++ b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV11.java
@@ -24,6 +24,10 @@
 	 * Does the chunk have a separator?
 	 */
 	public boolean hasSeparator() {
+		// For some reason, there are two types that don't have a 
+		//  separator despite the flags that indicate they do
+		if(type == 0x1f || type == 0xc9) { return false; }
+		
 		// If there's a trailer, there's a separator
 		if(hasTrailer()) { return true; }
 
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkSeparator.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkSeparator.java
index 7098f17..5ce4097 100644
--- a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkSeparator.java
+++ b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkSeparator.java
@@ -27,4 +27,8 @@
 		separatorData = new byte[4];
 		System.arraycopy(data, offset, separatorData, 0, 4);
 	}
+	
+	public String toString() {
+		return "<ChunkSeparator of length " + separatorData.length + ">";
+	}
 }
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkTrailer.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkTrailer.java
index a610b49..a590732 100644
--- a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkTrailer.java
+++ b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkTrailer.java
@@ -26,4 +26,8 @@
 		trailerData = new byte[8];
 		System.arraycopy(data, offset, trailerData, 0, 8);
 	}
+	
+	public String toString() {
+		return "<ChunkTrailer of length " + trailerData.length + ">";
+	}
 }
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/dev/VSDDumper.java b/src/scratchpad/src/org/apache/poi/hdgf/dev/VSDDumper.java
index 3c20e4f..614b925 100644
--- a/src/scratchpad/src/org/apache/poi/hdgf/dev/VSDDumper.java
+++ b/src/scratchpad/src/org/apache/poi/hdgf/dev/VSDDumper.java
@@ -70,6 +70,11 @@
 				" - " + Integer.toHexString(ptr.getFormat()));
 		System.out.println(ind + "  Length is\t" + ptr.getLength() +
 				" - " + Integer.toHexString(ptr.getLength()));
+		if(ptr.destinationCompressed()) {
+			int decompLen = stream._getContentsLength();
+			System.out.println(ind + "  DC.Length is\t" + decompLen +
+					" - " + Integer.toHexString(decompLen));
+		}
 		System.out.println(ind + "  Compressed is\t" + ptr.destinationCompressed());
 		System.out.println(ind + "  Stream is\t" + stream.getClass().getName());
 		
@@ -100,6 +105,9 @@
 			for(int i=0; i<cs.getChunks().length; i++) {
 				Chunk chunk = cs.getChunks()[i];
 				System.out.println(ind2 + "" + chunk.getName());
+				System.out.println(ind2 + "  Length is " + chunk._getContents().length + " (" + Integer.toHexString(chunk._getContents().length) + ")");
+				System.out.println(ind2 + "  OD Size is " + chunk.getOnDiskSize() + " (" + Integer.toHexString(chunk.getOnDiskSize()) + ")");
+				System.out.println(ind2 + "  T / S is " + chunk.getTrailer() + " / " + chunk.getSeparator());
 				System.out.println(ind2 + "  Holds " + chunk.getCommands().length + " commands");
 				for(int j=0; j<chunk.getCommands().length; j++) {
 					Command command = chunk.getCommands()[j];
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java b/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java
new file mode 100644
index 0000000..b2c4ee3
--- /dev/null
+++ b/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java
@@ -0,0 +1,114 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hdgf.extractor;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+
+import org.apache.poi.hdgf.HDGFDiagram;
+import org.apache.poi.hdgf.chunks.Chunk.Command;
+import org.apache.poi.hdgf.streams.ChunkStream;
+import org.apache.poi.hdgf.streams.PointerContainingStream;
+import org.apache.poi.hdgf.streams.Stream;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+/**
+ * Class to find all the text in a Visio file, and return it.
+ * Can opperate on the command line (outputs to stdout), or
+ *  can return the text for you (eg for use with Lucene).
+ */
+public class VisioTextExtractor {
+	private HDGFDiagram hdgf;
+	private POIFSFileSystem fs;
+
+	public VisioTextExtractor(HDGFDiagram hdgf) {
+		this.hdgf = hdgf;
+	}
+	public VisioTextExtractor(POIFSFileSystem fs) throws IOException {
+		this(new HDGFDiagram(fs));
+		this.fs = fs;
+	}
+	public VisioTextExtractor(InputStream inp) throws IOException {
+		this(new POIFSFileSystem(inp));
+	}
+	
+	/**
+	 * Locates all the text entries in the file, and returns their
+	 *  contents.
+	 */
+	public String[] getAllText() {
+		ArrayList text = new ArrayList();
+		for(int i=0; i<hdgf.getTopLevelStreams().length; i++) {
+			findText(hdgf.getTopLevelStreams()[i], text);
+		}
+		return (String[])text.toArray( new String[text.size()] );
+	}
+	private void findText(Stream stream, ArrayList text) {
+		if(stream instanceof PointerContainingStream) {
+			PointerContainingStream ps = (PointerContainingStream)stream;
+			for(int i=0; i<ps.getPointedToStreams().length; i++) {
+				findText(ps.getPointedToStreams()[i], text);
+			}
+		}
+		if(stream instanceof ChunkStream) {
+			ChunkStream cs = (ChunkStream)stream;
+			for(int i=0; i<cs.getChunks().length; i++) {
+				if(cs.getChunks()[i] != null && 
+						cs.getChunks()[i].getName() != null &&
+						cs.getChunks()[i].getName().equals("Text")) {
+					// First command
+					Command cmd = cs.getChunks()[i].getCommands()[0];
+					if(cmd != null && cmd.getValue() != null) {
+						text.add( cmd.getValue().toString() );
+					}
+				}
+			}
+		}
+	}
+	
+	/**
+	 * Returns the textual contents of the file.
+	 */
+	public String getText() {
+		StringBuffer text = new StringBuffer();
+		String[] allText = getAllText();
+		for(int i=0; i<allText.length; i++) {
+			text.append(allText[i]);
+			if(!allText[i].endsWith("\r") &&
+					!allText[i].endsWith("\n")) {
+				text.append("\n");
+			}
+		}
+		return text.toString();
+	}
+	
+	public static void main(String[] args) throws Exception {
+		if(args.length == 0) {
+			System.err.println("Use:");
+			System.err.println("   VisioTextExtractor <file.vsd>");
+			System.exit(1);
+		}
+		
+		VisioTextExtractor extractor = 
+			new VisioTextExtractor(new FileInputStream(args[0]));
+		
+		// Print not PrintLn as already has \n added to it
+		System.out.print(extractor.getText());
+	}
+}
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java b/src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java
index 75b6bee..a59fe43 100644
--- a/src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java
+++ b/src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java
@@ -43,6 +43,11 @@
 	public void findChunks() {
 		ArrayList chunksA = new ArrayList();
 		
+		if(getPointer().getOffset() == 0x64b3) {
+			int i = 0;
+			i++;
+		}
+		
 		int pos = 0;
 		byte[] contents = getStore().getContents();
 		while(pos < contents.length) {
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/streams/Stream.java b/src/scratchpad/src/org/apache/poi/hdgf/streams/Stream.java
index 35aa7e5..163fa83 100644
--- a/src/scratchpad/src/org/apache/poi/hdgf/streams/Stream.java
+++ b/src/scratchpad/src/org/apache/poi/hdgf/streams/Stream.java
@@ -83,7 +83,7 @@
 			return new ChunkStream(pointer, store, chunkFactory); 
 		}
 		else if(pointer.destinationHasStrings()) {
-			return new StringsStream(pointer, store);
+			return new StringsStream(pointer, store, chunkFactory);
 		}
 		
 		// Give up and return a generic one
diff --git a/src/scratchpad/src/org/apache/poi/hdgf/streams/StringsStream.java b/src/scratchpad/src/org/apache/poi/hdgf/streams/StringsStream.java
index 2688b15..b23ff92 100644
--- a/src/scratchpad/src/org/apache/poi/hdgf/streams/StringsStream.java
+++ b/src/scratchpad/src/org/apache/poi/hdgf/streams/StringsStream.java
@@ -16,13 +16,16 @@
 ==================================================================== */
 package org.apache.poi.hdgf.streams;
 
+import org.apache.poi.hdgf.chunks.ChunkFactory;
 import org.apache.poi.hdgf.pointers.Pointer;
 
 /**
- * A Stream which holds Strings
+ * A Stream which holds Strings. This is just another kind
+ *  of ChunkStream, it seems
  */
 public class StringsStream extends Stream {
-	protected StringsStream(Pointer pointer, StreamStore store) {
+	protected StringsStream(Pointer pointer, StreamStore store, ChunkFactory chunkFactory) {
 		super(pointer, store);
+//		super(pointer, store, chunkFactory);
 	}
 }
diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java b/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java
new file mode 100644
index 0000000..a6541e9
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java
@@ -0,0 +1,107 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hdgf.extractor;
+
+import java.io.ByteArrayOutputStream;
+import java.io.FileInputStream;
+import java.io.PrintStream;
+
+import junit.framework.TestCase;
+
+import org.apache.poi.hdgf.HDGFDiagram;
+import org.apache.poi.hdgf.chunks.Chunk;
+import org.apache.poi.hdgf.chunks.ChunkFactory;
+import org.apache.poi.hdgf.pointers.Pointer;
+import org.apache.poi.hdgf.pointers.PointerFactory;
+import org.apache.poi.hssf.record.formula.eval.StringOperationEval;
+import org.apache.poi.poifs.filesystem.DocumentEntry;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+public class TestVisioExtractor extends TestCase {
+	private String filename;
+	protected void setUp() throws Exception {
+		String dirname = System.getProperty("HDGF.testdata.path");
+		filename = dirname + "/Test_Visio-Some_Random_Text.vsd";
+	}
+	
+	/**
+	 * Test the 3 different ways of creating one
+	 */
+	public void testCreation() throws Exception {
+		VisioTextExtractor extractor;
+		
+		extractor = new VisioTextExtractor(new FileInputStream(filename));
+		assertNotNull(extractor);
+		assertNotNull(extractor.getAllText());
+		assertEquals(3, extractor.getAllText().length);
+		
+		extractor = new VisioTextExtractor(
+				new POIFSFileSystem(
+						new FileInputStream(filename)
+				)
+		);
+		assertNotNull(extractor);
+		assertNotNull(extractor.getAllText());
+		assertEquals(3, extractor.getAllText().length);
+		
+		extractor = new VisioTextExtractor(
+			new HDGFDiagram(
+				new POIFSFileSystem(
+						new FileInputStream(filename)
+				)
+			)
+		);
+		assertNotNull(extractor);
+		assertNotNull(extractor.getAllText());
+		assertEquals(3, extractor.getAllText().length);
+	}
+	
+	public void testExtraction() throws Exception {
+		VisioTextExtractor extractor =
+			new VisioTextExtractor(new FileInputStream(filename));
+		
+		// Check the array fetch
+		String[] text = extractor.getAllText();
+		assertNotNull(text);
+		assertEquals(3, text.length);
+		
+		assertEquals("Test View\n", text[0]);
+		assertEquals("I am a test view\n", text[1]);
+		assertEquals("Some random text, on a page\n", text[2]);
+		
+		// And the all-in fetch
+		String textS = extractor.getText();
+		assertEquals("Test View\nI am a test view\nSome random text, on a page\n", textS);
+	}
+	
+	public void testMain() throws Exception {
+		PrintStream oldOut = System.out;
+		ByteArrayOutputStream baos = new ByteArrayOutputStream();
+		PrintStream capture = new PrintStream(baos);
+		System.setOut(capture);
+		
+		VisioTextExtractor.main(new String[] {filename});
+		
+		// Put things back
+		System.setOut(oldOut);
+		
+		// Check
+		capture.flush();
+		String text = baos.toString();
+		assertEquals("Test View\nI am a test view\nSome random text, on a page\n", text);
+	}
+}
diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/streams/TestStreamComplex.java b/src/scratchpad/testcases/org/apache/poi/hdgf/streams/TestStreamComplex.java
index c2d03f0..5ea21d1 100644
--- a/src/scratchpad/testcases/org/apache/poi/hdgf/streams/TestStreamComplex.java
+++ b/src/scratchpad/testcases/org/apache/poi/hdgf/streams/TestStreamComplex.java
@@ -18,6 +18,7 @@
 
 import java.io.FileInputStream;
 
+import org.apache.poi.hdgf.chunks.Chunk;
 import org.apache.poi.hdgf.chunks.ChunkFactory;
 import org.apache.poi.hdgf.pointers.Pointer;
 import org.apache.poi.hdgf.pointers.PointerFactory;
@@ -202,4 +203,63 @@
 		assertTrue(s8451.getPointedToStreams()[0] instanceof StringsStream);
 		assertTrue(s8451.getPointedToStreams()[1] instanceof StringsStream);
 	}
+	
+	public void testChunkWithText() throws Exception {
+		// Parent ChunkStream is at 0x7194
+		// This is one of the last children of the trailer
+		Pointer trailerPtr = ptrFactory.createPointer(contents, trailerPointerAt);
+		TrailerStream ts = (TrailerStream)
+			Stream.createStream(trailerPtr, contents, chunkFactory, ptrFactory);
+		
+		ts.findChildren(contents);
+		
+		assertNotNull(ts.getChildPointers());
+		assertNotNull(ts.getPointedToStreams());
+		assertEquals(20, ts.getChildPointers().length);
+		assertEquals(20, ts.getPointedToStreams().length);
+		
+		assertEquals(0x7194, ts.getChildPointers()[13].getOffset());
+		assertEquals(0x7194, ts.getPointedToStreams()[13].getPointer().getOffset());
+		
+		PointerContainingStream ps7194 = (PointerContainingStream)
+			ts.getPointedToStreams()[13];
+		
+		// First child is at 0x64b3
+		assertEquals(0x64b3, ps7194.getChildPointers()[0].getOffset());
+		assertEquals(0x64b3, ps7194.getPointedToStreams()[0].getPointer().getOffset());
+		
+		ChunkStream cs = (ChunkStream)ps7194.getPointedToStreams()[0];
+		
+		// Should be 26bc bytes un-compressed
+		assertEquals(0x26bc, cs.getStore().getContents().length);
+		// And should have lots of children
+		assertEquals(131, cs.getChunks().length);
+		
+		// One of which is Text
+		boolean hasText = false;
+		for(int i=0; i<cs.getChunks().length; i++) {
+			if(cs.getChunks()[i].getName().equals("Text")) {
+				hasText = true;
+			}
+		}
+		assertTrue(hasText);
+		// Which is the 72nd command
+		assertEquals("Text", cs.getChunks()[72].getName());
+		
+		Chunk text = cs.getChunks()[72];
+		assertEquals("Text", text.getName());
+		
+		// Which contains our text
+		assertEquals(1, text.getCommands().length);
+		assertEquals("Test View\n", text.getCommands()[0].getValue());
+		
+		
+		// Almost at the end is some more text
+		assertEquals("Text", cs.getChunks()[128].getName());
+		text = cs.getChunks()[128];
+		assertEquals("Text", text.getName());
+		
+		assertEquals(1, text.getCommands().length);
+		assertEquals("Some random text, on a page\n", text.getCommands()[0].getValue());
+	}
 }