AOO410/main/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java - openoffice - Git at Google

 /**************************************************************
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  *
  *************************************************************/


 package org.openoffice.xmerge.converter.xml.sxw.wordsmith;

 import org.w3c.dom.*;

 import java.io.IOException;
 import java.util.Enumeration;

 import org.openoffice.xmerge.Document;
 import org.openoffice.xmerge.ConvertData;
 import org.openoffice.xmerge.ConvertException;
 import org.openoffice.xmerge.DocumentDeserializer;
 import org.openoffice.xmerge.converter.xml.OfficeConstants;
 import org.openoffice.xmerge.converter.palm.PalmDB;
 import org.openoffice.xmerge.converter.palm.Record;
 import org.openoffice.xmerge.converter.palm.PdbDecoder;
 import org.openoffice.xmerge.converter.palm.PalmDocument;
 import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;

 import java.util.Vector;
 import java.io.ByteArrayInputStream;

 import org.openoffice.xmerge.converter.xml.*;
 import org.openoffice.xmerge.util.Debug;
 import org.openoffice.xmerge.util.XmlUtil;

 /**
  *  <p>WordSmith implementation of
  *  org.openoffice.xmerge.DocumentDeserializer
  *  for the {@link
  *  org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl
  *  PluginFactoryImpl}.</p>
  *
  *  The <code>deserialize</code> method uses a
  *  <code>DocDecoder</code> to read the WordSmith format into a
  *  <code>String</code> object, then it calls <code>buildDocument</code>
  *  to create a <code>SxwDocument</code> object from it.
  *
  *  @author      Herbie Ong, David Proulx
  */
 public final class DocumentDeserializerImpl
 implements DOCConstants, OfficeConstants, DocumentDeserializer {

     /** A Decoder object for decoding WordSmith format. */
     private WSDecoder decoder = null;

     WseFontTable fontTable = null;
     WseColorTable colorTable = null;
     StyleCatalog styleCat = null;
     StyleCatalog oldStyleCat = null;

     /** A <code>ConvertData</code> object assigned to this object. */
     private ConvertData cd = null;


     /**
      *  Constructor that assigns the given <code>ConvertData</code>
      *  to the object.
      *
      *  @param  cd  A <code>ConvertData</code> object to read data for
      *              the conversion process by the deserialize method.
      */
     public DocumentDeserializerImpl(ConvertData cd) {
         this.cd = cd;
     }


     /**
      *  Convert the given <code>ConvertData</code> into a
      *  <code>SxwDocument</code> object.
      *
      *  @return  Resulting <code>Document</code> object.
      *
      *  @throws  ConvertException  If any conversion error occurs.
      *  @throws  IOException       If any I/O error occurs.
      */
     public Document deserialize() throws ConvertException,
         IOException {
         return deserialize(null, cd);
     }


     public Document deserialize(Document origDoc, ConvertData cd)
     throws IOException {

         Document doc         = null;
         PalmDocument palmDoc = null;
         Enumeration e        = cd.getDocumentEnumeration();

         while(e.hasMoreElements()) {
             palmDoc        = (PalmDocument) e.nextElement();
             PalmDB pdb     = palmDoc.getPdb();
             Record[] recs  = pdb.getRecords();
             decoder        = new WSDecoder();
             Wse[] b        = decoder.parseDocument(recs);
             String docName = palmDoc.getName();
             doc            = buildDocument(docName, b, origDoc);
         }
         return doc;
     }


     /**
      *  Temporary method to read existing <code>StyleCatalog</code>
      *  as a starting point.
      *
      *  @param  parentDoc  The parent <code>Document</code>.
      */
     private void readStyleCatalog(Document parentDoc) {
         Element rootNode = null;
         try {
             java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream();
             parentDoc.write(bos);
             SxwDocument sxwDoc = new SxwDocument("old");
             sxwDoc.read(new ByteArrayInputStream(bos.toByteArray()));
             org.w3c.dom.Document domDoc = sxwDoc.getContentDOM();

             String families[] = new String[3];
             families[0] = "text";
             families[1] = "paragraph";
             families[2] = "paragraph";
             Class classes[] = new Class[3];
             classes[0] = TextStyle.class;
             classes[1] = ParaStyle.class;
             classes[2] = TextStyle.class;

             NodeList nl = domDoc.getElementsByTagName(TAG_OFFICE_STYLES);
             oldStyleCat.add(nl.item(0), families, classes, null, false);
             nl = domDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
             oldStyleCat.add(nl.item(0), families, classes, null, false);
             nl = domDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
             oldStyleCat.add(nl.item(0), families, classes, null, false);

         } catch (Exception e) {
             Debug.log(Debug.ERROR, "", e);
         }

     }


     /**
      *  Given an array of paragraph <code>Style</code> objects, see if
      *  there is exactly one which matches the text formatting
      *  <code>Style</code> of <code>tStyle</code>.
      *
      *  @param  paraStyles  An array of paragraph <code>Style</code>
      *                      objects.
      *  @param  tStyle      Text <code>Style</code> to match.
      *
      *  @return  The paragraph <code>Style</code> that matches.
      */
     private ParaStyle matchParaByText(Style paraStyles[], TextStyle tStyle) {
         int matchIndex = -1;
     int matchCount = 0;
     Style txtMatches[] = (Style[]) oldStyleCat.getMatching(tStyle);
     if (txtMatches.length >= 1) {
         for (int j = 0; j < txtMatches.length; j++) {
             TextStyle t = (TextStyle)txtMatches[j];

             if (!t.getFamily().equals("paragraph"))
                 continue;

             for (int k = 0; k < paraStyles.length; k++) {
                 if (t.getName().equals(paraStyles[k].getName())) {
                     matchCount++;
                 matchIndex = k;
                 }
             }
         }
     }
     if (matchCount == 1)
             return (ParaStyle)paraStyles[matchIndex];
         else return null;
     }


     /**
      *  Take a <code>String</code> of text and turn it into a sequence
      *  of <code>Node</code> objects.
      *
      *  @param  text       <code>String</code> of text.
      *  @param  parentDoc  Parent <code>Document</code>.
      *
      *  @return  Array of <code>Node</code> objects.
      */
     private Node[] parseText(String text, org.w3c.dom.Document parentDoc) {
     Vector nodeVec = new Vector();

         // Break up the text from the WordSmith text run into Open
         // Office text runs.  There may be more runs in OO because
         // runs of 2 or more spaces map to nodes.
         while ((text.indexOf("  ") != -1) || (text.indexOf("\t") != 1)) {

             // Find the indices of tabs and multiple spaces, and
             // figure out which of them occurs first in the string.
             int spaceIndex = text.indexOf("  ");
             int tabIndex = text.indexOf("\t");
             if ((spaceIndex == -1) && (tabIndex == -1))
                 break;  // DJP This should not be necessary.  What is wrong
             // with the while() stmt up above?
             int closerIndex;  // Index of the first of these
             if (spaceIndex == -1)
                 closerIndex = tabIndex;
             else if (tabIndex == -1)
                 closerIndex = spaceIndex;
             else
                 closerIndex = (spaceIndex > tabIndex) ? tabIndex : spaceIndex;

             // If there is any text prior to the first occurrence of a
             // tab or spaces, create a text node from it, then chop it
             // off the string we're working with.
             if (closerIndex > 0) {
                 String beginningText = text.substring(0, closerIndex);
                 Text textNode = parentDoc.createTextNode(beginningText);
                 nodeVec.addElement(textNode);
                 log("<TEXT>");
                 log(beginningText);
                 log("</TEXT>");
             }
             text = text.substring(closerIndex);

             // Handle either tab character or space sequence by creating
             // an element for it, and then chopping out the text that
             // represented it in "text".
             if (closerIndex == tabIndex) {
                 Element tabNode = parentDoc.createElement(TAG_TAB_STOP);
                 nodeVec.add(tabNode);
                 text = text.substring(1);  // tab is always a single character
                 log("<TAB/>");
             } else {
                 // Compute length of space sequence.
                 int nrSpaces = 2;
                 while ((nrSpaces < text.length())
                 && text.substring(nrSpaces, nrSpaces + 1).equals(" "))
                     nrSpaces++;

                 Element spaceNode = parentDoc.createElement(TAG_SPACE);
                 spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT, new Integer(nrSpaces).toString());
                 nodeVec.add(spaceNode);
                 text = text.substring(nrSpaces);
                 log("<SPACE count=\"" + nrSpaces + "\" />");
             }
         }

         // No more tabs or space sequences.  If there's any remaining
         // text create a text node for it.
         if (text.length() > 0) {
             Text textNode = parentDoc.createTextNode(text);
             nodeVec.add(textNode);
             log("<TEXT>");
             log(text);
             log("</TEXT>");
         }

         // Now create and populate an array to return the nodes in.
         Node nodes[] = new Node[nodeVec.size()];
         for (int i = 0; i < nodeVec.size(); i++)
             nodes[i] = (Node)nodeVec.elementAt(i);
         return nodes;
     }


     /**
      *  Parses the text content of a WordSmith format and builds a
      *  <code>SXWDocument</code>.
      *
      *  @param  docName  <code>Document</code> name
      *  @param  data      Text content of WordSmith format
      *
      *  @return  Resulting <code>SXWDocument</code> object.
      *
      *  @throws  IOException  If any I/O error occurs.
      */
     private SxwDocument buildDocument(String docName, Wse[] data, Document origDoc)
     throws IOException {

         // create minimum office xml document.
         SxwDocument sxwDoc = new SxwDocument(docName);
         sxwDoc.initContentDOM();

         org.w3c.dom.Document doc = sxwDoc.getContentDOM();

         // Grab hold of the office:body tag,
         // Assume there should be one.
         // This is where top level paragraphs will append to.
         NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY);
         Node bodyNode = list.item(0);

         styleCat = new StyleCatalog(50);
         oldStyleCat = new StyleCatalog(50);
            if (origDoc != null)
              readStyleCatalog(origDoc);

         Element currPara = null;
         ParaStyle currParaStyle = null;
         int newTextStyleNr = 0;
         int newParaStyleNr = 0;

         // Now write out the document body by running through
         // the list of WordSmith elements and processing each one
         // in turn.
         for (int i = 0; i < data.length; i++) {

             if (data[i].getClass() == WsePara.class) {

                 currPara = doc.createElement(TAG_PARAGRAPH);
                 log("</PARA>");
                 log("<PARA>");

                 WsePara p = (WsePara)data[i];

                 // Save info about the first text run, if there is one.
                 WseTextRun firstTextRun = null;

                 if ((data.length >= i + 2)
                 && (data[i+1].getClass() == WseTextRun.class))
                     firstTextRun = (WseTextRun)data[i+1];

                 Style matches[] = oldStyleCat.getMatching(p.makeStyle());

                 // See if we can find a unique match in the catalog
                 // of existing styles from the original document.
                 ParaStyle pStyle = null;
                 if (matches.length == 1) {
                     pStyle = (ParaStyle)matches[0];
                     log("using an existing style");
                 } else if ((matches.length > 1) && (firstTextRun != null)) {
                     pStyle = matchParaByText(matches, firstTextRun.makeStyle());
                     log("resolved a para by looking @ text");
                 }

                 // If nothing found so far, try looking in the catalog
                 // of newly-created styles.
                 // DJP FIXME: if we need to add two para styles with the
                 // same para formatting info but different default text
                 // styles, this won't work!
                 if (pStyle == null) {
                     log("had " + matches.length + " matches in old catalog");
                     matches = styleCat.getMatching(p.makeStyle());
                     if (matches.length == 0) {
                         pStyle = p.makeStyle();
                         String newName = new String("PPP" + ++newParaStyleNr);
                         pStyle.setName(newName);
                         styleCat.add(pStyle);
                         // DJP: write in the text format info here
                         log("created a new style");
                     } else if (matches.length == 1) {
                         pStyle = (ParaStyle)matches[0];
                         log("re-using a new style");
                     } else if (firstTextRun != null) {
                         pStyle = matchParaByText(matches, firstTextRun.makeStyle());
                         if (pStyle != null) {
                             log("resolved a (new) para by looking @ text");
                     } else
                             log("Hey this shouldn't happen! - nr of matches is "
                             + matches.length);
                     }
                 }

                 if (pStyle == null)
                     log("Unable to figure out a para style");

                 // Figured out a style to use.  Specify the style in this
                 // paragraph's attributes.
                 currPara.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, pStyle.getName());

                 bodyNode.appendChild(currPara);
                 currParaStyle = pStyle;
             } else if (data[i].getClass() == WseTextRun.class) {
                 WseTextRun tr = (WseTextRun)data[i];
                 TextStyle trStyle = null;
                 Node trNodes[] = parseText(tr.getText(), doc);

                 // First see if the formatting of this text run matches
                 // the default text formatting for this paragraph.  If
                 // it does, then just make the text node(s) children of
                 // the current paragraph.
                 Style[] cps = new Style[1];
                 cps[0] = currParaStyle;
                 if (matchParaByText(cps, tr.makeStyle()) != null) {
                     for (int ii  = 0; ii < trNodes.length; ii++) {
                         currPara.appendChild(trNodes[ii]);
                     }
                     continue;
              }

                 // Check for existing, matching styles in the old style
                 // catalog.  If exactly one is found, use it.  Otherwise,
                 // check the new style catalog, and either use the style
                 // found or add this new one to it.
                 Style matches[] = oldStyleCat.getMatching(tr.makeStyle());
                 if (matches.length == 1)
                     trStyle = (TextStyle)matches[0];
                 else {
                     matches = styleCat.getMatching(tr.makeStyle());
                     if (matches.length == 0) {
                         trStyle = tr.makeStyle();
                         String newName = new String("TTT" + ++newTextStyleNr);
                         trStyle.setName(newName);
                         styleCat.add(trStyle);
                     } else if (matches.length == 1)
                         trStyle = (TextStyle)matches[0];
                     else
                         log("multiple text style matches from new catalog");
                 }

                 // Create a text span node, set the style attribute, make the
                 // text node(s) its children, and append it to current paragraph's
                 // list of children.
                 Element textSpanNode = doc.createElement(TAG_SPAN);
                 textSpanNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, trStyle.getName());
                 for (int ii  = 0; ii < trNodes.length; ii++) {
                     textSpanNode.appendChild(trNodes[ii]);
                 }
                 currPara.appendChild(textSpanNode);
                 log("</SPAN>");
             }

             else if (data[i].getClass() == WseFontTable.class) {
                 fontTable = (WseFontTable)data[i];
             }

             else if (data[i].getClass() == WseColorTable.class) {
                 colorTable = (WseColorTable)data[i];
             }
         }


         //NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT);
         NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT_CONTENT);
         Node rootNode = r.item(0);

         // read the original document
         org.w3c.dom.NodeList nl;
         if (origDoc != null) {
             java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream();
             origDoc.write(bos);
             SxwDocument origSxwDoc = new SxwDocument("old");
             origSxwDoc.read(new ByteArrayInputStream(bos.toByteArray()));
             org.w3c.dom.Document origDomDoc = origSxwDoc.getContentDOM();

             XmlUtil xu = new XmlUtil();
             org.w3c.dom.DocumentFragment df;
             org.w3c.dom.Node newNode;

             // copy font declarations from original document to the new document
             nl = origDomDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
             df = doc.createDocumentFragment();
             newNode = xu.deepClone(df, nl.item(0));
             rootNode.insertBefore(newNode, bodyNode);

             // copy style catalog from original document to the new document
             nl = origDomDoc.getElementsByTagName(TAG_OFFICE_STYLES);
             df = doc.createDocumentFragment();
             newNode = xu.deepClone(df, nl.item(0));
             rootNode.insertBefore(newNode, bodyNode);

             nl = origDomDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
             df = doc.createDocumentFragment();
             newNode = xu.deepClone(df, nl.item(0));
             rootNode.insertBefore(newNode, bodyNode);

             nl = origDomDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
             df = doc.createDocumentFragment();
             newNode = xu.deepClone(df, nl.item(0));
             rootNode.insertBefore(newNode, bodyNode);
         }

         // Original document not specified.  We need to add font declarations.
         // DJP: this might just be for debugging.  Merger will probably put
         // the "real" ones in.
         // DJP: if really doing it this way, do it right: gather font names
         // from style catalog(s).
         else {
             org.w3c.dom.Node declNode;

             log("<FONT-DECLS/>");

             declNode = doc.createElement(TAG_OFFICE_FONT_DECLS);
             rootNode.insertBefore(declNode, bodyNode);
             org.w3c.dom.Element fontNode;

             fontNode = doc.createElement(TAG_STYLE_FONT_DECL);
             fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arial");
             fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arial");
             fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable");
             declNode.appendChild(fontNode);

             fontNode = doc.createElement(TAG_STYLE_FONT_DECL);
             fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arioso");
             fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arioso");
             fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable");
             declNode.appendChild(fontNode);
         }


         // Now add any new styles we have created in this document.
         nl = doc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
         Node autoStylesNode = nl.item(0);
         if (autoStylesNode == null) {
             autoStylesNode = doc.createElement(TAG_OFFICE_AUTOMATIC_STYLES);
             log("<OFFICE-AUTOMATIC-STYLES/>");
             rootNode.insertBefore(autoStylesNode, bodyNode);
         }

         Node newStyleCatNode = styleCat.writeNode(doc, "dummy");
         nl = newStyleCatNode.getChildNodes();
         int nNodes = nl.getLength();
         for (int i = 0; i < nNodes; i++) {
             autoStylesNode.appendChild(nl.item(0));
         }

         oldStyleCat.dumpCSV(true);
         styleCat.dumpCSV(true);
         return sxwDoc;
     }


     /**
      *  Sends message to the log object.
      *
      *  @param  str  Debug message.
      */
     private void log(String str) {

          Debug.log(Debug.TRACE, str);
     }


     /*
     public static void main(String args[]) {

      //   DocumentDeserializerImpl d = new DocumentDeserializerImpl(new InputStream());

         Node nodes[] = parseText("Tab here:\tThen some more text");
     }
 */
 }
	/**************************************************************
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*
	*************************************************************/



	package org.openoffice.xmerge.converter.xml.sxw.wordsmith;

	import org.w3c.dom.*;

	import java.io.IOException;
	import java.util.Enumeration;

	import org.openoffice.xmerge.Document;
	import org.openoffice.xmerge.ConvertData;
	import org.openoffice.xmerge.ConvertException;
	import org.openoffice.xmerge.DocumentDeserializer;
	import org.openoffice.xmerge.converter.xml.OfficeConstants;
	import org.openoffice.xmerge.converter.palm.PalmDB;
	import org.openoffice.xmerge.converter.palm.Record;
	import org.openoffice.xmerge.converter.palm.PdbDecoder;
	import org.openoffice.xmerge.converter.palm.PalmDocument;
	import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;

	import java.util.Vector;
	import java.io.ByteArrayInputStream;

	import org.openoffice.xmerge.converter.xml.*;
	import org.openoffice.xmerge.util.Debug;
	import org.openoffice.xmerge.util.XmlUtil;

	/**
	* <p>WordSmith implementation of
	* org.openoffice.xmerge.DocumentDeserializer
	* for the {@link
	* org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl
	* PluginFactoryImpl}.</p>
	*
	* The <code>deserialize</code> method uses a
	* <code>DocDecoder</code> to read the WordSmith format into a
	* <code>String</code> object, then it calls <code>buildDocument</code>
	* to create a <code>SxwDocument</code> object from it.
	*
	* @author Herbie Ong, David Proulx
	*/
	public final class DocumentDeserializerImpl
	implements DOCConstants, OfficeConstants, DocumentDeserializer {

	/** A Decoder object for decoding WordSmith format. */
	private WSDecoder decoder = null;

	WseFontTable fontTable = null;
	WseColorTable colorTable = null;
	StyleCatalog styleCat = null;
	StyleCatalog oldStyleCat = null;

	/** A <code>ConvertData</code> object assigned to this object. */
	private ConvertData cd = null;


	/**
	* Constructor that assigns the given <code>ConvertData</code>
	* to the object.
	*
	* @param cd A <code>ConvertData</code> object to read data for
	* the conversion process by the deserialize method.
	*/
	public DocumentDeserializerImpl(ConvertData cd) {
	this.cd = cd;
	}


	/**
	* Convert the given <code>ConvertData</code> into a
	* <code>SxwDocument</code> object.
	*
	* @return Resulting <code>Document</code> object.
	*
	* @throws ConvertException If any conversion error occurs.
	* @throws IOException If any I/O error occurs.
	*/
	public Document deserialize() throws ConvertException,
	IOException {
	return deserialize(null, cd);
	}


	public Document deserialize(Document origDoc, ConvertData cd)
	throws IOException {

	Document doc = null;
	PalmDocument palmDoc = null;
	Enumeration e = cd.getDocumentEnumeration();

	while(e.hasMoreElements()) {
	palmDoc = (PalmDocument) e.nextElement();
	PalmDB pdb = palmDoc.getPdb();
	Record[] recs = pdb.getRecords();
	decoder = new WSDecoder();
	Wse[] b = decoder.parseDocument(recs);
	String docName = palmDoc.getName();
	doc = buildDocument(docName, b, origDoc);
	}
	return doc;
	}


	/**
	* Temporary method to read existing <code>StyleCatalog</code>
	* as a starting point.
	*
	* @param parentDoc The parent <code>Document</code>.
	*/
	private void readStyleCatalog(Document parentDoc) {
	Element rootNode = null;
	try {
	java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream();
	parentDoc.write(bos);
	SxwDocument sxwDoc = new SxwDocument("old");
	sxwDoc.read(new ByteArrayInputStream(bos.toByteArray()));
	org.w3c.dom.Document domDoc = sxwDoc.getContentDOM();

	String families[] = new String[3];
	families[0] = "text";
	families[1] = "paragraph";
	families[2] = "paragraph";
	Class classes[] = new Class[3];
	classes[0] = TextStyle.class;
	classes[1] = ParaStyle.class;
	classes[2] = TextStyle.class;

	NodeList nl = domDoc.getElementsByTagName(TAG_OFFICE_STYLES);
	oldStyleCat.add(nl.item(0), families, classes, null, false);
	nl = domDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
	oldStyleCat.add(nl.item(0), families, classes, null, false);
	nl = domDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
	oldStyleCat.add(nl.item(0), families, classes, null, false);

	} catch (Exception e) {
	Debug.log(Debug.ERROR, "", e);
	}

	}


	/**
	* Given an array of paragraph <code>Style</code> objects, see if
	* there is exactly one which matches the text formatting
	* <code>Style</code> of <code>tStyle</code>.
	*
	* @param paraStyles An array of paragraph <code>Style</code>
	* objects.
	* @param tStyle Text <code>Style</code> to match.
	*
	* @return The paragraph <code>Style</code> that matches.
	*/
	private ParaStyle matchParaByText(Style paraStyles[], TextStyle tStyle) {
	int matchIndex = -1;
	int matchCount = 0;
	Style txtMatches[] = (Style[]) oldStyleCat.getMatching(tStyle);
	if (txtMatches.length >= 1) {
	for (int j = 0; j < txtMatches.length; j++) {
	TextStyle t = (TextStyle)txtMatches[j];

	if (!t.getFamily().equals("paragraph"))
	continue;

	for (int k = 0; k < paraStyles.length; k++) {
	if (t.getName().equals(paraStyles[k].getName())) {
	matchCount++;
	matchIndex = k;
	}
	}
	}
	}
	if (matchCount == 1)
	return (ParaStyle)paraStyles[matchIndex];
	else return null;
	}


	/**
	* Take a <code>String</code> of text and turn it into a sequence
	* of <code>Node</code> objects.
	*
	* @param text <code>String</code> of text.
	* @param parentDoc Parent <code>Document</code>.
	*
	* @return Array of <code>Node</code> objects.
	*/
	private Node[] parseText(String text, org.w3c.dom.Document parentDoc) {
	Vector nodeVec = new Vector();

	// Break up the text from the WordSmith text run into Open
	// Office text runs. There may be more runs in OO because
	// runs of 2 or more spaces map to nodes.
	while ((text.indexOf(" ") != -1) \|\| (text.indexOf("\t") != 1)) {

	// Find the indices of tabs and multiple spaces, and
	// figure out which of them occurs first in the string.
	int spaceIndex = text.indexOf(" ");
	int tabIndex = text.indexOf("\t");
	if ((spaceIndex == -1) && (tabIndex == -1))
	break; // DJP This should not be necessary. What is wrong
	// with the while() stmt up above?
	int closerIndex; // Index of the first of these
	if (spaceIndex == -1)
	closerIndex = tabIndex;
	else if (tabIndex == -1)
	closerIndex = spaceIndex;
	else
	closerIndex = (spaceIndex > tabIndex) ? tabIndex : spaceIndex;

	// If there is any text prior to the first occurrence of a
	// tab or spaces, create a text node from it, then chop it
	// off the string we're working with.
	if (closerIndex > 0) {
	String beginningText = text.substring(0, closerIndex);
	Text textNode = parentDoc.createTextNode(beginningText);
	nodeVec.addElement(textNode);
	log("<TEXT>");
	log(beginningText);
	log("</TEXT>");
	}
	text = text.substring(closerIndex);

	// Handle either tab character or space sequence by creating
	// an element for it, and then chopping out the text that
	// represented it in "text".
	if (closerIndex == tabIndex) {
	Element tabNode = parentDoc.createElement(TAG_TAB_STOP);
	nodeVec.add(tabNode);
	text = text.substring(1); // tab is always a single character
	log("<TAB/>");
	} else {
	// Compute length of space sequence.
	int nrSpaces = 2;
	while ((nrSpaces < text.length())
	&& text.substring(nrSpaces, nrSpaces + 1).equals(" "))
	nrSpaces++;

	Element spaceNode = parentDoc.createElement(TAG_SPACE);
	spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT, new Integer(nrSpaces).toString());
	nodeVec.add(spaceNode);
	text = text.substring(nrSpaces);
	log("<SPACE count=\"" + nrSpaces + "\" />");
	}
	}

	// No more tabs or space sequences. If there's any remaining
	// text create a text node for it.
	if (text.length() > 0) {
	Text textNode = parentDoc.createTextNode(text);
	nodeVec.add(textNode);
	log("<TEXT>");
	log(text);
	log("</TEXT>");
	}

	// Now create and populate an array to return the nodes in.
	Node nodes[] = new Node[nodeVec.size()];
	for (int i = 0; i < nodeVec.size(); i++)
	nodes[i] = (Node)nodeVec.elementAt(i);
	return nodes;
	}


	/**
	* Parses the text content of a WordSmith format and builds a
	* <code>SXWDocument</code>.
	*
	* @param docName <code>Document</code> name
	* @param data Text content of WordSmith format
	*
	* @return Resulting <code>SXWDocument</code> object.
	*
	* @throws IOException If any I/O error occurs.
	*/
	private SxwDocument buildDocument(String docName, Wse[] data, Document origDoc)
	throws IOException {

	// create minimum office xml document.
	SxwDocument sxwDoc = new SxwDocument(docName);
	sxwDoc.initContentDOM();

	org.w3c.dom.Document doc = sxwDoc.getContentDOM();

	// Grab hold of the office:body tag,
	// Assume there should be one.
	// This is where top level paragraphs will append to.
	NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY);
	Node bodyNode = list.item(0);

	styleCat = new StyleCatalog(50);
	oldStyleCat = new StyleCatalog(50);
	if (origDoc != null)
	readStyleCatalog(origDoc);

	Element currPara = null;
	ParaStyle currParaStyle = null;
	int newTextStyleNr = 0;
	int newParaStyleNr = 0;

	// Now write out the document body by running through
	// the list of WordSmith elements and processing each one
	// in turn.
	for (int i = 0; i < data.length; i++) {

	if (data[i].getClass() == WsePara.class) {

	currPara = doc.createElement(TAG_PARAGRAPH);
	log("</PARA>");
	log("<PARA>");

	WsePara p = (WsePara)data[i];

	// Save info about the first text run, if there is one.
	WseTextRun firstTextRun = null;

	if ((data.length >= i + 2)
	&& (data[i+1].getClass() == WseTextRun.class))
	firstTextRun = (WseTextRun)data[i+1];

	Style matches[] = oldStyleCat.getMatching(p.makeStyle());

	// See if we can find a unique match in the catalog
	// of existing styles from the original document.
	ParaStyle pStyle = null;
	if (matches.length == 1) {
	pStyle = (ParaStyle)matches[0];
	log("using an existing style");
	} else if ((matches.length > 1) && (firstTextRun != null)) {
	pStyle = matchParaByText(matches, firstTextRun.makeStyle());
	log("resolved a para by looking @ text");
	}

	// If nothing found so far, try looking in the catalog
	// of newly-created styles.
	// DJP FIXME: if we need to add two para styles with the
	// same para formatting info but different default text
	// styles, this won't work!
	if (pStyle == null) {
	log("had " + matches.length + " matches in old catalog");
	matches = styleCat.getMatching(p.makeStyle());
	if (matches.length == 0) {
	pStyle = p.makeStyle();
	String newName = new String("PPP" + ++newParaStyleNr);
	pStyle.setName(newName);
	styleCat.add(pStyle);
	// DJP: write in the text format info here
	log("created a new style");
	} else if (matches.length == 1) {
	pStyle = (ParaStyle)matches[0];
	log("re-using a new style");
	} else if (firstTextRun != null) {
	pStyle = matchParaByText(matches, firstTextRun.makeStyle());
	if (pStyle != null) {
	log("resolved a (new) para by looking @ text");
	} else
	log("Hey this shouldn't happen! - nr of matches is "
	+ matches.length);
	}
	}

	if (pStyle == null)
	log("Unable to figure out a para style");

	// Figured out a style to use. Specify the style in this
	// paragraph's attributes.
	currPara.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, pStyle.getName());

	bodyNode.appendChild(currPara);
	currParaStyle = pStyle;
	} else if (data[i].getClass() == WseTextRun.class) {
	WseTextRun tr = (WseTextRun)data[i];
	TextStyle trStyle = null;
	Node trNodes[] = parseText(tr.getText(), doc);

	// First see if the formatting of this text run matches
	// the default text formatting for this paragraph. If
	// it does, then just make the text node(s) children of
	// the current paragraph.
	Style[] cps = new Style[1];
	cps[0] = currParaStyle;
	if (matchParaByText(cps, tr.makeStyle()) != null) {
	for (int ii = 0; ii < trNodes.length; ii++) {
	currPara.appendChild(trNodes[ii]);
	}
	continue;
	}

	// Check for existing, matching styles in the old style
	// catalog. If exactly one is found, use it. Otherwise,
	// check the new style catalog, and either use the style
	// found or add this new one to it.
	Style matches[] = oldStyleCat.getMatching(tr.makeStyle());
	if (matches.length == 1)
	trStyle = (TextStyle)matches[0];
	else {
	matches = styleCat.getMatching(tr.makeStyle());
	if (matches.length == 0) {
	trStyle = tr.makeStyle();
	String newName = new String("TTT" + ++newTextStyleNr);
	trStyle.setName(newName);
	styleCat.add(trStyle);
	} else if (matches.length == 1)
	trStyle = (TextStyle)matches[0];
	else
	log("multiple text style matches from new catalog");
	}

	// Create a text span node, set the style attribute, make the
	// text node(s) its children, and append it to current paragraph's
	// list of children.
	Element textSpanNode = doc.createElement(TAG_SPAN);
	textSpanNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, trStyle.getName());
	for (int ii = 0; ii < trNodes.length; ii++) {
	textSpanNode.appendChild(trNodes[ii]);
	}
	currPara.appendChild(textSpanNode);
	log("</SPAN>");
	}

	else if (data[i].getClass() == WseFontTable.class) {
	fontTable = (WseFontTable)data[i];
	}

	else if (data[i].getClass() == WseColorTable.class) {
	colorTable = (WseColorTable)data[i];
	}
	}


	//NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT);
	NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT_CONTENT);
	Node rootNode = r.item(0);

	// read the original document
	org.w3c.dom.NodeList nl;
	if (origDoc != null) {
	java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream();
	origDoc.write(bos);
	SxwDocument origSxwDoc = new SxwDocument("old");
	origSxwDoc.read(new ByteArrayInputStream(bos.toByteArray()));
	org.w3c.dom.Document origDomDoc = origSxwDoc.getContentDOM();

	XmlUtil xu = new XmlUtil();
	org.w3c.dom.DocumentFragment df;
	org.w3c.dom.Node newNode;

	// copy font declarations from original document to the new document
	nl = origDomDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
	df = doc.createDocumentFragment();
	newNode = xu.deepClone(df, nl.item(0));
	rootNode.insertBefore(newNode, bodyNode);

	// copy style catalog from original document to the new document
	nl = origDomDoc.getElementsByTagName(TAG_OFFICE_STYLES);
	df = doc.createDocumentFragment();
	newNode = xu.deepClone(df, nl.item(0));
	rootNode.insertBefore(newNode, bodyNode);

	nl = origDomDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
	df = doc.createDocumentFragment();
	newNode = xu.deepClone(df, nl.item(0));
	rootNode.insertBefore(newNode, bodyNode);

	nl = origDomDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
	df = doc.createDocumentFragment();
	newNode = xu.deepClone(df, nl.item(0));
	rootNode.insertBefore(newNode, bodyNode);
	}

	// Original document not specified. We need to add font declarations.
	// DJP: this might just be for debugging. Merger will probably put
	// the "real" ones in.
	// DJP: if really doing it this way, do it right: gather font names
	// from style catalog(s).
	else {
	org.w3c.dom.Node declNode;

	log("<FONT-DECLS/>");

	declNode = doc.createElement(TAG_OFFICE_FONT_DECLS);
	rootNode.insertBefore(declNode, bodyNode);
	org.w3c.dom.Element fontNode;

	fontNode = doc.createElement(TAG_STYLE_FONT_DECL);
	fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arial");
	fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arial");
	fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable");
	declNode.appendChild(fontNode);

	fontNode = doc.createElement(TAG_STYLE_FONT_DECL);
	fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arioso");
	fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arioso");
	fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable");
	declNode.appendChild(fontNode);
	}


	// Now add any new styles we have created in this document.
	nl = doc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
	Node autoStylesNode = nl.item(0);
	if (autoStylesNode == null) {
	autoStylesNode = doc.createElement(TAG_OFFICE_AUTOMATIC_STYLES);
	log("<OFFICE-AUTOMATIC-STYLES/>");
	rootNode.insertBefore(autoStylesNode, bodyNode);
	}

	Node newStyleCatNode = styleCat.writeNode(doc, "dummy");
	nl = newStyleCatNode.getChildNodes();
	int nNodes = nl.getLength();
	for (int i = 0; i < nNodes; i++) {
	autoStylesNode.appendChild(nl.item(0));
	}

	oldStyleCat.dumpCSV(true);
	styleCat.dumpCSV(true);
	return sxwDoc;
	}


	/**
	* Sends message to the log object.
	*
	* @param str Debug message.
	*/
	private void log(String str) {

	Debug.log(Debug.TRACE, str);
	}


	/*
	public static void main(String args[]) {

	// DocumentDeserializerImpl d = new DocumentDeserializerImpl(new InputStream());

	Node nodes[] = parseText("Tab here:\tThen some more text");
	}
	*/
	}