blob: c5243bbaa75441cdcc43b78a61f25faba0692de2 [file] [log] [blame]
/**************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*************************************************************/
package org.openoffice.xmerge.converter.xml.sxw.aportisdoc;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import org.w3c.dom.Text;
import java.io.IOException;
import java.util.Enumeration;
import org.openoffice.xmerge.Document;
import org.openoffice.xmerge.ConvertData;
import org.openoffice.xmerge.ConvertException;
import org.openoffice.xmerge.DocumentDeserializer;
import org.openoffice.xmerge.converter.xml.OfficeConstants;
import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
import org.openoffice.xmerge.converter.palm.PalmDB;
import org.openoffice.xmerge.converter.palm.Record;
import org.openoffice.xmerge.converter.palm.PalmDocument;
import org.openoffice.xmerge.util.Debug;
/**
* <p>AportisDoc implementation of <code>DocumentDeserializer</code>
* for the {@link
* org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl
* PluginFactoryImpl}.</p>
*
* <p>This converts an file in AportisDoc PDB format to StarOffice
* XML format.</p>
*
* <p>The <code>deserialize</code> method uses a <code>DocDecoder</code>
* to read the AportisDoc format into a <code>String</code> object, then
* it calls <code>buildDocument</code> to create a <code>SxwDocument</code>
* object from it.</p>
*
* @author Herbie Ong
*/
public final class DocumentDeserializerImpl
implements OfficeConstants, DocConstants, DocumentDeserializer {
/** A <code>ConvertData</code> object assigned to this object. */
private ConvertData cd = null;
/**
* Constructor that assigns the given <code>ConvertData</code>
* to this object as input.
*
* @param cd A <code>ConvertData</code> object to read data for
* the conversion process by the <code>deserialize</code>
* method.
*/
public DocumentDeserializerImpl(ConvertData cd) {
this.cd = cd;
}
/**
* Convert the given <code>ConvertData</code> object
* into a <code>SxwDocument</code> object.
*
* @return Resulting <code>SxwDocument</code> object.
*
* @throws ConvertException If any conversion error occurs.
* @throws IOException If any I/O error occurs.
*/
public Document deserialize() throws IOException, ConvertException {
int numberOfPDBs = cd.getNumDocuments();
Document doc = null;
int i=0;
ConvertData cdOut;
Enumeration e = cd.getDocumentEnumeration();
while (e.hasMoreElements()) {
PalmDocument palmDoc = (PalmDocument) e.nextElement();
PalmDB pdb = palmDoc.getPdb();
log("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
log("<AportisDoc>");
Record[] recs = pdb.getRecords();
String docName = palmDoc.getName();
DocDecoder decoder = new DocDecoder();
String text = decoder.parseRecords(recs);
doc = buildDocument(docName, text);
log("</AportisDoc>");
}
return doc;
}
/**
* Parses the text content of an AportisDoc format and build a
* <code>SxwDocument</code>.
*
* @param docName Name of <code>Document</code>.
* @param str Text content of AportisDoc format.
*
* @return Resulting <code>SxwDocument</code> object.
*
* @throws IOException If any I/O error occurs.
*/
private SxwDocument buildDocument(String docName, String str)
throws IOException {
// create minimum office xml document.
SxwDocument sxwDoc = new SxwDocument(docName);
sxwDoc.initContentDOM();
org.w3c.dom.Document doc = sxwDoc.getContentDOM();
// Grab hold of the office:body tag,
// Assume there should be one.
// This is where top level paragraphs will append to.
NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY);
Node bodyNode = list.item(0);
// Store all the text in a character array.
char[] text = str.toCharArray();
// startIndex has 2 purposes:
// if value is -1, it means that there are no text characters
// needed to be processed for a Text node. if value >= 0, it
// is the index of the starting position of a text section
// for a Text node.
int startIndex = -1;
// Create a paragraph node to start with.
Element paraNode = doc.createElement(TAG_PARAGRAPH);
log("<PARA>");
for (int i = 0; i < text.length; i++) {
switch (text[i]) {
case TAB_CHAR:
// Check if there are text to be processed first.
if (startIndex >= 0) {
addTextNode(doc, paraNode, text, startIndex, i - 1);
startIndex = -1;
}
// Then, add tab element.
Element tabNode = doc.createElement(TAG_TAB_STOP);
paraNode.appendChild(tabNode);
log("<TAB/>");
break;
case EOL_CHAR:
// Check if there are text to be processed first.
if (startIndex >= 0) {
addTextNode(doc, paraNode, text, startIndex, i - 1);
startIndex = -1;
}
// Then, add the current paragraph to body.
bodyNode.appendChild(paraNode);
// Create another paragraph element.
paraNode = doc.createElement(TAG_PARAGRAPH);
log("</PARA>");
log("<PARA>");
break;
case SPACE_CHAR:
// count is the number of space chars from i
int count = 0;
// Do a look ahead and count the number of space chars
while (text[i + 1 + count] == SPACE_CHAR) {
count++;
}
// Need to build a space node ONLY if count is > 1.
if (count > 0) {
// Check if there are text to be processed first
if (startIndex >= 0) {
addTextNode(doc, paraNode, text,
startIndex, i);
startIndex = -1;
}
// Then, create a space element
// with the proper attribute.
Element spaceNode = doc.createElement(TAG_SPACE);
spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT,
Integer.toString(count));
paraNode.appendChild(spaceNode);
// reposition i to the last space character.
i += count;
log("<SPACE count=\"" + count + "\" />");
} else {
// If there are no chars for text node yet,
// consider this one.
if (startIndex < 0) {
startIndex = i;
log("<TEXT>");
}
}
break;
default:
// If there are no chars for text node yet,
// this should be the start.
if (startIndex < 0) {
startIndex = i;
log("<TEXT>");
}
break;
}
}
int lastIndex = text.length - 1;
// Check if there are text to be processed first.
if (startIndex >= 0) {
addTextNode(doc, paraNode, text, startIndex, lastIndex);
}
// Then, add the last paragraph element if it is not added yet.
if (text[lastIndex] != EOL_CHAR) {
bodyNode.appendChild(paraNode);
}
log("</PARA>");
return sxwDoc;
}
/**
* Add a Text <code>Node</code> to the given paragraph node with the
* text starting at the given <code>startPos</code> until
* <code>endPos</code>.
*
* @param doc <code>org.w3c.dom.Document</code> object for creating
* <code>Node</code> objects.
* @param para The current paragraph <code>Node</code> to append
* text <code>Node</code>.
* @param text Array of characters containing text.
* @param startPos Starting index position for text value.
* @param endPos End index position for text value.
*/
private void addTextNode(org.w3c.dom.Document doc, Node para, char text[],
int startPos, int endPos) {
String str = new String(text, startPos, endPos - startPos + 1);
Text textNode = doc.createTextNode(str);
para.appendChild(textNode);
log(str);
log("</TEXT>");
}
/**
* Sends message to the log object.
*
* @param str Debug message.
*/
private void log(String str) {
Debug.log(Debug.TRACE, str);
}
}