AOO410/main/xmerge/source/aportisdoc/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentDeserializerImpl.java - openoffice - Git at Google

 /**************************************************************
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  *
  *************************************************************/


 package org.openoffice.xmerge.converter.xml.sxw.aportisdoc;

 import org.w3c.dom.NodeList;
 import org.w3c.dom.Node;
 import org.w3c.dom.Element;
 import org.w3c.dom.Text;

 import java.io.IOException;
 import java.util.Enumeration;

 import org.openoffice.xmerge.Document;
 import org.openoffice.xmerge.ConvertData;
 import org.openoffice.xmerge.ConvertException;
 import org.openoffice.xmerge.DocumentDeserializer;
 import org.openoffice.xmerge.converter.xml.OfficeConstants;
 import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
 import org.openoffice.xmerge.converter.palm.PalmDB;
 import org.openoffice.xmerge.converter.palm.Record;
 import org.openoffice.xmerge.converter.palm.PalmDocument;
 import org.openoffice.xmerge.util.Debug;

 /**
  *  <p>AportisDoc implementation of <code>DocumentDeserializer</code>
  *  for the {@link
  *  org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl
  *  PluginFactoryImpl}.</p>
  *
  *  <p>This converts an file in AportisDoc PDB format to StarOffice
  *  XML format.</p>
  *
  *  <p>The <code>deserialize</code> method uses a <code>DocDecoder</code>
  *  to read the AportisDoc format into a <code>String</code> object, then
  *  it calls <code>buildDocument</code> to create a <code>SxwDocument</code>
  *  object from it.</p>
  *
  *  @author      Herbie Ong
  */
 public final class DocumentDeserializerImpl
     implements OfficeConstants, DocConstants, DocumentDeserializer {

     /**  A <code>ConvertData</code> object assigned to this object. */
     private ConvertData cd = null;


     /**
      *  Constructor that assigns the given <code>ConvertData</code>
      *  to this object as input.
      *
      *  @param  cd  A <code>ConvertData</code> object to read data for
      *              the conversion process by the <code>deserialize</code>
      *              method.
      */
     public DocumentDeserializerImpl(ConvertData cd) {
         this.cd = cd;
     }


     /**
      *  Convert the given <code>ConvertData</code> object
      *  into a <code>SxwDocument</code> object.
      *
      *  @return  Resulting <code>SxwDocument</code> object.
      *
      *  @throws  ConvertException   If any conversion error occurs.
      *  @throws  IOException        If any I/O error occurs.
      */
     public Document deserialize() throws IOException, ConvertException {

         int numberOfPDBs = cd.getNumDocuments();
         Document doc = null;
         int i=0;
         ConvertData cdOut;
         Enumeration e = cd.getDocumentEnumeration();
         while (e.hasMoreElements()) {
             PalmDocument palmDoc = (PalmDocument) e.nextElement();
             PalmDB pdb = palmDoc.getPdb();

             log("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
             log("<AportisDoc>");

             Record[] recs = pdb.getRecords();
             String docName = palmDoc.getName();
             DocDecoder decoder = new DocDecoder();
             String text = decoder.parseRecords(recs);
             doc = buildDocument(docName, text);

             log("</AportisDoc>");
         }

         return doc;
     }


     /**
      *  Parses the text content of an AportisDoc format and build a
      *  <code>SxwDocument</code>.
      *
      *  @param  docName  Name of <code>Document</code>.
      *  @param  str      Text content of AportisDoc format.
      *
      *  @return  Resulting <code>SxwDocument</code> object.
      *
      *  @throws  IOException  If any I/O error occurs.
      */
     private SxwDocument buildDocument(String docName, String str)
         throws IOException {

         // create minimum office xml document.
         SxwDocument sxwDoc = new SxwDocument(docName);
         sxwDoc.initContentDOM();

         org.w3c.dom.Document doc = sxwDoc.getContentDOM();

         // Grab hold of the office:body tag,
         // Assume there should be one.
         // This is where top level paragraphs will append to.
         NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY);
         Node bodyNode = list.item(0);

         // Store all the text in a character array.
         char[] text = str.toCharArray();

         // startIndex has 2 purposes:
         // if value is -1, it means that there are no text characters
         // needed to be processed for a Text node.  if value >= 0, it
         // is the index of the starting position of a text section
         // for a Text node.
         int startIndex = -1;

         // Create a paragraph node to start with.
         Element paraNode = doc.createElement(TAG_PARAGRAPH);

         log("<PARA>");

         for (int i = 0; i < text.length; i++) {

             switch (text[i]) {

                 case TAB_CHAR:

                     // Check if there are text to be processed first.
                     if (startIndex >= 0) {
                         addTextNode(doc, paraNode, text, startIndex, i - 1);
                         startIndex = -1;
                     }

                     // Then, add tab element.
                     Element tabNode = doc.createElement(TAG_TAB_STOP);
                     paraNode.appendChild(tabNode);

                     log("<TAB/>");
                     break;

                 case EOL_CHAR:

                     // Check if there are text to be processed first.
                     if (startIndex >= 0) {
                         addTextNode(doc, paraNode, text, startIndex, i - 1);
                         startIndex = -1;
                     }

                     // Then, add the current paragraph to body.
                     bodyNode.appendChild(paraNode);

                     // Create another paragraph element.
                     paraNode = doc.createElement(TAG_PARAGRAPH);

                     log("</PARA>");
                     log("<PARA>");
                     break;

                 case SPACE_CHAR:

                     // count is the number of space chars from i
                     int count = 0;

                     // Do a look ahead and count the number of space chars
                     while (text[i + 1 + count] == SPACE_CHAR) {
                         count++;
                     }

                     // Need to build a space node ONLY if count is > 1.

                     if (count > 0) {

                         // Check if there are text to be processed first
                         if (startIndex >= 0) {
                             addTextNode(doc, paraNode, text,
                                         startIndex, i);
                             startIndex = -1;
                         }

                         // Then, create a space element
                         // with the proper attribute.
                         Element spaceNode = doc.createElement(TAG_SPACE);
                         spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT,
                             Integer.toString(count));

                         paraNode.appendChild(spaceNode);

                         // reposition i to the last space character.
                         i += count;

                         log("<SPACE count=\"" + count + "\" />");

                     } else {

                         // If there are no chars for text node yet,
                         // consider this one.
                         if (startIndex < 0) {

                             startIndex = i;
                             log("<TEXT>");
                         }
                     }

                     break;

                 default:

                     // If there are no chars for text node yet,
                     // this should be the start.
                     if (startIndex < 0) {

                         startIndex = i;
                         log("<TEXT>");
                     }

                     break;
             }
         }

         int lastIndex = text.length - 1;

         // Check if there are text to be processed first.

         if (startIndex >= 0) {
             addTextNode(doc, paraNode, text, startIndex, lastIndex);
         }

         // Then, add the last paragraph element if it is not added yet.
         if (text[lastIndex] != EOL_CHAR) {
             bodyNode.appendChild(paraNode);
         }

         log("</PARA>");

         return sxwDoc;
     }


     /**
      *  Add a Text <code>Node</code> to the given paragraph node with the
      *  text starting at the given <code>startPos</code> until
      *  <code>endPos</code>.
      *
      *  @param  doc       <code>org.w3c.dom.Document</code> object for creating
      *                    <code>Node</code> objects.
      *  @param  para      The current paragraph <code>Node</code> to append
      *                    text <code>Node</code>.
      *  @param  text      Array of characters containing text.
      *  @param  startPos  Starting index position for text value.
      *  @param  endPos    End index position for text value.
      */
     private void addTextNode(org.w3c.dom.Document doc, Node para, char text[],
         int startPos, int endPos) {

         String str = new String(text, startPos, endPos - startPos + 1);
         Text textNode = doc.createTextNode(str);
         para.appendChild(textNode);
         log(str);
         log("</TEXT>");
     }

     /**
      *  Sends message to the log object.
      *
      *  @param  str  Debug message.
      */
     private void log(String str) {

         Debug.log(Debug.TRACE, str);
     }
 }
	/**************************************************************
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*
	*************************************************************/



	package org.openoffice.xmerge.converter.xml.sxw.aportisdoc;

	import org.w3c.dom.NodeList;
	import org.w3c.dom.Node;
	import org.w3c.dom.Element;
	import org.w3c.dom.Text;

	import java.io.IOException;
	import java.util.Enumeration;

	import org.openoffice.xmerge.Document;
	import org.openoffice.xmerge.ConvertData;
	import org.openoffice.xmerge.ConvertException;
	import org.openoffice.xmerge.DocumentDeserializer;
	import org.openoffice.xmerge.converter.xml.OfficeConstants;
	import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
	import org.openoffice.xmerge.converter.palm.PalmDB;
	import org.openoffice.xmerge.converter.palm.Record;
	import org.openoffice.xmerge.converter.palm.PalmDocument;
	import org.openoffice.xmerge.util.Debug;

	/**
	* <p>AportisDoc implementation of <code>DocumentDeserializer</code>
	* for the {@link
	* org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl
	* PluginFactoryImpl}.</p>
	*
	* <p>This converts an file in AportisDoc PDB format to StarOffice
	* XML format.</p>
	*
	* <p>The <code>deserialize</code> method uses a <code>DocDecoder</code>
	* to read the AportisDoc format into a <code>String</code> object, then
	* it calls <code>buildDocument</code> to create a <code>SxwDocument</code>
	* object from it.</p>
	*
	* @author Herbie Ong
	*/
	public final class DocumentDeserializerImpl
	implements OfficeConstants, DocConstants, DocumentDeserializer {

	/** A <code>ConvertData</code> object assigned to this object. */
	private ConvertData cd = null;


	/**
	* Constructor that assigns the given <code>ConvertData</code>
	* to this object as input.
	*
	* @param cd A <code>ConvertData</code> object to read data for
	* the conversion process by the <code>deserialize</code>
	* method.
	*/
	public DocumentDeserializerImpl(ConvertData cd) {
	this.cd = cd;
	}


	/**
	* Convert the given <code>ConvertData</code> object
	* into a <code>SxwDocument</code> object.
	*
	* @return Resulting <code>SxwDocument</code> object.
	*
	* @throws ConvertException If any conversion error occurs.
	* @throws IOException If any I/O error occurs.
	*/
	public Document deserialize() throws IOException, ConvertException {

	int numberOfPDBs = cd.getNumDocuments();
	Document doc = null;
	int i=0;
	ConvertData cdOut;
	Enumeration e = cd.getDocumentEnumeration();
	while (e.hasMoreElements()) {
	PalmDocument palmDoc = (PalmDocument) e.nextElement();
	PalmDB pdb = palmDoc.getPdb();

	log("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
	log("<AportisDoc>");

	Record[] recs = pdb.getRecords();
	String docName = palmDoc.getName();
	DocDecoder decoder = new DocDecoder();
	String text = decoder.parseRecords(recs);
	doc = buildDocument(docName, text);

	log("</AportisDoc>");
	}

	return doc;
	}


	/**
	* Parses the text content of an AportisDoc format and build a
	* <code>SxwDocument</code>.
	*
	* @param docName Name of <code>Document</code>.
	* @param str Text content of AportisDoc format.
	*
	* @return Resulting <code>SxwDocument</code> object.
	*
	* @throws IOException If any I/O error occurs.
	*/
	private SxwDocument buildDocument(String docName, String str)
	throws IOException {

	// create minimum office xml document.
	SxwDocument sxwDoc = new SxwDocument(docName);
	sxwDoc.initContentDOM();

	org.w3c.dom.Document doc = sxwDoc.getContentDOM();

	// Grab hold of the office:body tag,
	// Assume there should be one.
	// This is where top level paragraphs will append to.
	NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY);
	Node bodyNode = list.item(0);

	// Store all the text in a character array.
	char[] text = str.toCharArray();

	// startIndex has 2 purposes:
	// if value is -1, it means that there are no text characters
	// needed to be processed for a Text node. if value >= 0, it
	// is the index of the starting position of a text section
	// for a Text node.
	int startIndex = -1;

	// Create a paragraph node to start with.
	Element paraNode = doc.createElement(TAG_PARAGRAPH);

	log("<PARA>");

	for (int i = 0; i < text.length; i++) {

	switch (text[i]) {

	case TAB_CHAR:

	// Check if there are text to be processed first.
	if (startIndex >= 0) {
	addTextNode(doc, paraNode, text, startIndex, i - 1);
	startIndex = -1;
	}

	// Then, add tab element.
	Element tabNode = doc.createElement(TAG_TAB_STOP);
	paraNode.appendChild(tabNode);

	log("<TAB/>");
	break;

	case EOL_CHAR:

	// Check if there are text to be processed first.
	if (startIndex >= 0) {
	addTextNode(doc, paraNode, text, startIndex, i - 1);
	startIndex = -1;
	}

	// Then, add the current paragraph to body.
	bodyNode.appendChild(paraNode);

	// Create another paragraph element.
	paraNode = doc.createElement(TAG_PARAGRAPH);

	log("</PARA>");
	log("<PARA>");
	break;

	case SPACE_CHAR:

	// count is the number of space chars from i
	int count = 0;

	// Do a look ahead and count the number of space chars
	while (text[i + 1 + count] == SPACE_CHAR) {
	count++;
	}

	// Need to build a space node ONLY if count is > 1.

	if (count > 0) {

	// Check if there are text to be processed first
	if (startIndex >= 0) {
	addTextNode(doc, paraNode, text,
	startIndex, i);
	startIndex = -1;
	}

	// Then, create a space element
	// with the proper attribute.
	Element spaceNode = doc.createElement(TAG_SPACE);
	spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT,
	Integer.toString(count));

	paraNode.appendChild(spaceNode);

	// reposition i to the last space character.
	i += count;

	log("<SPACE count=\"" + count + "\" />");

	} else {

	// If there are no chars for text node yet,
	// consider this one.
	if (startIndex < 0) {

	startIndex = i;
	log("<TEXT>");
	}
	}

	break;

	default:

	// If there are no chars for text node yet,
	// this should be the start.
	if (startIndex < 0) {

	startIndex = i;
	log("<TEXT>");
	}

	break;
	}
	}

	int lastIndex = text.length - 1;

	// Check if there are text to be processed first.

	if (startIndex >= 0) {
	addTextNode(doc, paraNode, text, startIndex, lastIndex);
	}

	// Then, add the last paragraph element if it is not added yet.
	if (text[lastIndex] != EOL_CHAR) {
	bodyNode.appendChild(paraNode);
	}

	log("</PARA>");

	return sxwDoc;
	}


	/**
	* Add a Text <code>Node</code> to the given paragraph node with the
	* text starting at the given <code>startPos</code> until
	* <code>endPos</code>.
	*
	* @param doc <code>org.w3c.dom.Document</code> object for creating
	* <code>Node</code> objects.
	* @param para The current paragraph <code>Node</code> to append
	* text <code>Node</code>.
	* @param text Array of characters containing text.
	* @param startPos Starting index position for text value.
	* @param endPos End index position for text value.
	*/
	private void addTextNode(org.w3c.dom.Document doc, Node para, char text[],
	int startPos, int endPos) {

	String str = new String(text, startPos, endPos - startPos + 1);
	Text textNode = doc.createTextNode(str);
	para.appendChild(textNode);
	log(str);
	log("</TEXT>");
	}

	/**
	* Sends message to the log object.
	*
	* @param str Debug message.
	*/
	private void log(String str) {

	Debug.log(Debug.TRACE, str);
	}
	}