| /************************************************************** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| *************************************************************/ |
| |
| |
| |
| package org.openoffice.xmerge.converter.xml; |
| |
| import java.io.InputStream; |
| import java.io.OutputStream; |
| import java.io.Reader; |
| import java.io.BufferedReader; |
| import java.io.StringReader; |
| import java.io.StringWriter; |
| import java.io.InputStreamReader; |
| import java.io.ByteArrayOutputStream; |
| import java.io.ByteArrayInputStream; |
| import java.io.IOException; |
| import java.util.Iterator; |
| import java.util.Map; |
| import java.util.HashMap; |
| |
| import javax.xml.parsers.DocumentBuilderFactory; |
| import javax.xml.parsers.DocumentBuilder; |
| import javax.xml.parsers.ParserConfigurationException; |
| |
| import org.w3c.dom.Node; |
| import org.w3c.dom.Element; |
| import org.w3c.dom.Document; |
| import org.w3c.dom.DOMImplementation; |
| import org.w3c.dom.DocumentType; |
| import org.w3c.dom.NodeList; |
| import org.xml.sax.InputSource; |
| import org.w3c.dom.NamedNodeMap; |
| import org.xml.sax.SAXException; |
| |
| import javax.xml.transform.*; |
| import javax.xml.transform.dom.*; |
| import javax.xml.transform.stream.*; |
| |
| import org.openoffice.xmerge.util.Resources; |
| import org.openoffice.xmerge.util.Debug; |
| |
| /** |
| * An implementation of <code>Document</code> for |
| * StarOffice documents. |
| */ |
| public abstract class OfficeDocument |
| implements org.openoffice.xmerge.Document, |
| OfficeConstants { |
| |
| /** Factory for <code>DocumentBuilder</code> objects. */ |
| private static DocumentBuilderFactory factory = |
| DocumentBuilderFactory.newInstance(); |
| |
| /** DOM <code>Document</code> of content.xml. */ |
| private Document contentDoc = null; |
| |
| /** DOM <code>Document</code> of meta.xml. */ |
| private Document metaDoc = null; |
| |
| /** DOM <code>Document</code> of settings.xml. */ |
| private Document settingsDoc = null; |
| |
| /** DOM <code>Document</code> of content.xml. */ |
| private Document styleDoc = null; |
| |
| /** DOM <code>Docuemtn</code> of META-INF/manifest.xml. */ |
| private Document manifestDoc = null; |
| |
| private String documentName = null; |
| private String fileName = null; |
| |
| /** Resources object. */ |
| private Resources res = null; |
| |
| /** |
| * <code>OfficeZip</code> object to store zip contents from |
| * read <code>InputStream</code>. Note that this member |
| * will still be null if it was initialized using a template |
| * file instead of reading from a StarOffice zipped |
| * XML file. |
| */ |
| private OfficeZip zip = null; |
| |
| /** Collection to keep track of the embedded objects in the document. */ |
| private Map embeddedObjects = null; |
| |
| /** |
| * Default constructor. |
| * |
| * @param name <code>Document</code> name. |
| */ |
| public OfficeDocument(String name) |
| { |
| this(name, true, false); |
| } |
| |
| |
| /** |
| * Constructor with arguments to set <code>namespaceAware</code> |
| * and <code>validating</code> flags. |
| * |
| * @param name <code>Document</code> name (may or may not |
| * contain extension). |
| * @param namespaceAware Value for <code>namespaceAware</code> flag. |
| * @param validating Value for <code>validating</code> flag. |
| */ |
| public OfficeDocument(String name, boolean namespaceAware, boolean validating) { |
| |
| res = Resources.getInstance(); |
| factory.setValidating(validating); |
| factory.setNamespaceAware(namespaceAware); |
| this.documentName = trimDocumentName(name); |
| this.fileName = documentName + getFileExtension(); |
| } |
| |
| |
| /** |
| * Removes the file extension from the <code>Document</code> |
| * name. |
| * |
| * @param name Full <code>Document</code> name with extension. |
| * |
| * @return Name of <code>Document</code> without the extension. |
| */ |
| private String trimDocumentName(String name) { |
| String temp = name.toLowerCase(); |
| String ext = getFileExtension(); |
| |
| if (temp.endsWith(ext)) { |
| // strip the extension |
| int nlen = name.length(); |
| int endIndex = nlen - ext.length(); |
| name = name.substring(0,endIndex); |
| } |
| |
| return name; |
| } |
| |
| |
| /** |
| * Return a DOM <code>Document</code> object of the content.xml |
| * file. Note that a content DOM is not created when the constructor |
| * is called. So, either the <code>read</code> method or the |
| * <code>initContentDOM</code> method will need to be called ahead |
| * on this object before calling this method. |
| * |
| * @return DOM <code>Document</code> object. |
| */ |
| public Document getContentDOM() { |
| |
| return contentDoc; |
| } |
| |
| /** |
| * Return a DOM <code>Document</code> object of the meta.xml |
| * file. Note that a content DOM is not created when the constructor |
| * is called. So, either the <code>read</code> method or the |
| * <code>initContentDOM</code> method will need to be called ahead |
| * on this object before calling this method. |
| * |
| * @return DOM <code>Document</code> object. |
| */ |
| public Document getMetaDOM() { |
| |
| return metaDoc; |
| } |
| |
| |
| /** |
| * Return a DOM <code>Document</code> object of the settings.xml |
| * file. Note that a content DOM is not created when the constructor |
| * is called. So, either the <code>read</code> method or the |
| * <code>initContentDOM</code> method will need to be called ahead |
| * on this object before calling this method. |
| * |
| * @return DOM <code>Document</code> object. |
| */ |
| public Document getSettingsDOM() { |
| |
| return settingsDoc; |
| } |
| |
| |
| /** |
| * Sets the content tree of the document. |
| * |
| * @param newDom <code>Node</code> containing the new content tree. |
| */ |
| public void setContentDOM( Node newDom) { |
| contentDoc = (Document)newDom; |
| } |
| |
| |
| /** |
| * Sets the meta tree of the document. |
| * |
| * @param newDom <code>Node</code> containing the new meta tree. |
| */ |
| public void setMetaDOM (Node newDom) { |
| metaDoc = (Document)newDom; |
| } |
| |
| |
| /** |
| * Sets the settings tree of the document. |
| * |
| * @param newDom <code>Node</code> containing the new settings tree. |
| */ |
| public void setSettingsDOM (Node newDom) { |
| settingsDoc = (Document)newDom; |
| } |
| |
| |
| /** |
| * Sets the style tree of the document. |
| * |
| * @param newDom <code>Node</code> containing the new style tree. |
| */ |
| public void setStyleDOM (Node newDom) { |
| styleDoc = (Document)newDom; |
| } |
| |
| |
| /** |
| * Return a DOM <code>Document</code> object of the style.xml file. |
| * Note that this may return null if there is no style DOM. |
| * Note that a style DOM is not created when the constructor |
| * is called. Depending on the <code>InputStream</code>, a |
| * <code>read</code> method may or may not build a style DOM. When |
| * creating a new style DOM, call the <code>initStyleDOM</code> method |
| * first. |
| * |
| * @return DOM <code>Document</code> object. |
| */ |
| public Document getStyleDOM() { |
| |
| return styleDoc; |
| } |
| |
| |
| /** |
| * Return the name of the <code>Document</code>. |
| * |
| * @return The name of <code>Document</code>. |
| */ |
| public String getName() { |
| |
| return documentName; |
| } |
| |
| |
| /** |
| * Return the file name of the <code>Document</code>, possibly |
| * with the standard extension. |
| * |
| * @return The file name of <code>Document</code>. |
| */ |
| public String getFileName() { |
| |
| return fileName; |
| } |
| |
| |
| /** |
| * Returns the file extension for this type of |
| * <code>Document</code>. |
| * |
| * @return The file extension of <code>Document</code>. |
| */ |
| protected abstract String getFileExtension(); |
| |
| |
| /** |
| * Returns all the embedded objects (graphics, formulae, etc.) present in |
| * this document. |
| * |
| * @return An <code>Iterator</code> of <code>EmbeddedObject</code> objects. |
| */ |
| public Iterator getEmbeddedObjects() { |
| |
| if (embeddedObjects == null && manifestDoc != null) { |
| embeddedObjects = new HashMap(); |
| |
| // Need to read the manifest file and construct a list of objects |
| NodeList nl = manifestDoc.getElementsByTagName(TAG_MANIFEST_FILE); |
| |
| // Dont create the HashMap if there are no embedded objects |
| int len = nl.getLength(); |
| for (int i = 0; i < len; i++) { |
| Node n = nl.item(i); |
| |
| NamedNodeMap attrs = n.getAttributes(); |
| |
| String type = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_TYPE).getNodeValue(); |
| String path = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_PATH).getNodeValue(); |
| |
| |
| /* |
| * According to OpenOffice.org XML File Format document (ver. 1) |
| * there are only two types of embedded object: |
| * |
| * Objects with an XML representation. |
| * Objects without an XML representation. |
| * |
| * The former are represented by one or more XML files. |
| * The latter are in binary form. |
| */ |
| if (type.startsWith("application/vnd.sun.xml")) |
| { |
| if (path.equals("/")) { |
| // Exclude the main document entries |
| continue; |
| } |
| // Take off the trailing '/' |
| String name = path.substring(0, path.length() - 1); |
| embeddedObjects.put(name, new EmbeddedXMLObject(name, type, zip)); |
| } |
| else if (type.equals("text/xml")) { |
| // XML entries are either embedded StarOffice doc entries or main |
| // document entries |
| continue; |
| } |
| else { // FIX (HJ): allows empty MIME type |
| embeddedObjects.put(path, new EmbeddedBinaryObject(path, type, zip)); |
| } |
| } |
| } |
| |
| return embeddedObjects.values().iterator(); |
| } |
| |
| /** |
| * Returns the embedded object corresponding to the name provided. |
| * The name should be stripped of any preceding path characters, such as |
| * '/', '.' or '#'. |
| * |
| * @param name The name of the embedded object to retrieve. |
| * |
| * @return An <code>EmbeddedObject</code> instance representing the named |
| * object. |
| */ |
| public EmbeddedObject getEmbeddedObject(String name) { |
| if (name == null) { |
| return null; |
| } |
| |
| if (embeddedObjects == null) { |
| getEmbeddedObjects(); |
| } |
| |
| if (embeddedObjects.containsKey(name)) { |
| return (EmbeddedObject)embeddedObjects.get(name); |
| } |
| else { |
| return null; |
| } |
| } |
| |
| |
| /** |
| * Adds a new embedded object to the document. |
| * |
| * @param embObj An instance of <code>EmbeddedObject</code>. |
| */ |
| public void addEmbeddedObject(EmbeddedObject embObj) { |
| if (embObj == null) { |
| return; |
| } |
| |
| if (embeddedObjects == null) { |
| embeddedObjects = new HashMap(); |
| } |
| |
| embeddedObjects.put(embObj.getName(), embObj); |
| } |
| |
| |
| /** |
| * Read the Office <code>Document</code> from the given |
| * <code>InputStream</code>. |
| * |
| * @param is Office document <code>InputStream</code>. |
| * |
| * @throws IOException If any I/O error occurs. |
| */ |
| public void read(InputStream is) throws IOException { |
| |
| Debug.log(Debug.INFO, "reading Office file"); |
| |
| DocumentBuilder builder = null; |
| |
| try { |
| builder = factory.newDocumentBuilder(); |
| } catch (ParserConfigurationException ex) { |
| throw new OfficeDocumentException(ex); |
| } |
| |
| // read in Office zip file format |
| |
| zip = new OfficeZip(); |
| zip.read(is); |
| |
| // grab the content.xml and |
| // parse it into contentDoc. |
| |
| byte contentBytes[] = zip.getContentXMLBytes(); |
| |
| if (contentBytes == null) { |
| |
| throw new OfficeDocumentException("Entry content.xml not found in file"); |
| } |
| |
| try { |
| |
| contentDoc = parse(builder, contentBytes); |
| |
| } catch (SAXException ex) { |
| |
| throw new OfficeDocumentException(ex); |
| } |
| |
| // if style.xml exists, grab the style.xml |
| // parse it into styleDoc. |
| |
| byte styleBytes[] = zip.getStyleXMLBytes(); |
| |
| if (styleBytes != null) { |
| |
| try { |
| |
| styleDoc = parse(builder, styleBytes); |
| |
| } catch (SAXException ex) { |
| |
| throw new OfficeDocumentException(ex); |
| } |
| } |
| |
| byte metaBytes[] = zip.getMetaXMLBytes(); |
| |
| if (metaBytes != null) { |
| |
| try { |
| |
| metaDoc = parse(builder, metaBytes); |
| |
| } catch (SAXException ex) { |
| |
| throw new OfficeDocumentException(ex); |
| } |
| } |
| |
| byte settingsBytes[] = zip.getSettingsXMLBytes(); |
| |
| if (settingsBytes != null) { |
| |
| try { |
| |
| settingsDoc = parse(builder, settingsBytes); |
| |
| } catch (SAXException ex) { |
| |
| throw new OfficeDocumentException(ex); |
| } |
| } |
| |
| |
| // Read in the META-INF/manifest.xml file |
| byte manifestBytes[] = zip.getManifestXMLBytes(); |
| |
| if (manifestBytes != null) { |
| |
| try { |
| manifestDoc = parse(builder, manifestBytes); |
| } catch (SAXException ex) { |
| throw new OfficeDocumentException(ex); |
| } |
| } |
| |
| } |
| |
| |
| /** |
| * Read the Office <code>Document</code> from the given |
| * <code>InputStream</code>. |
| * |
| * @param is Office document <code>InputStream</code>. |
| * @param isZip <code>boolean</code> Identifies whether |
| * a file is zipped or not |
| * |
| * @throws IOException If any I/O error occurs. |
| */ |
| public void read(InputStream is, boolean isZip) throws IOException { |
| |
| Debug.log(Debug.INFO, "reading Office file"); |
| |
| DocumentBuilder builder = null; |
| |
| try { |
| builder = factory.newDocumentBuilder(); |
| } catch (ParserConfigurationException ex) { |
| throw new OfficeDocumentException(ex); |
| } |
| |
| if (isZip) |
| { |
| read(is); |
| } |
| else{ |
| try{ |
| //System.out.println("\nParsing Input stream, validating?: "+builder.isValidating()); |
| //contentDoc= builder.parse((InputStream)is); |
| |
| Reader r = secondHack(is); |
| InputSource ins = new InputSource(r); |
| org.w3c.dom.Document newDoc = builder.parse(ins); |
| //org.w3c.dom.Document newDoc = builder.parse((InputStream)is); |
| Element rootElement=newDoc.getDocumentElement(); |
| |
| NodeList nodeList; |
| Node tmpNode; |
| Node rootNode = (Node)rootElement; |
| if (newDoc !=null){ |
| /*content*/ |
| contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT); |
| rootElement=contentDoc.getDocumentElement(); |
| rootNode = (Node)rootElement; |
| |
| // FIX (HJ): Include office:font-decls in content DOM |
| nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS); |
| if (nodeList.getLength()>0){ |
| tmpNode = contentDoc.importNode(nodeList.item(0),true); |
| rootNode.appendChild(tmpNode); |
| } |
| |
| nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); |
| if (nodeList.getLength()>0){ |
| tmpNode = contentDoc.importNode(nodeList.item(0),true); |
| rootNode.appendChild(tmpNode); |
| } |
| |
| nodeList= newDoc.getElementsByTagName(TAG_OFFICE_BODY); |
| if (nodeList.getLength()>0){ |
| tmpNode = contentDoc.importNode(nodeList.item(0),true); |
| rootNode.appendChild(tmpNode); |
| } |
| |
| /*Styles*/ |
| styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES); |
| rootElement=styleDoc.getDocumentElement(); |
| rootNode = (Node)rootElement; |
| |
| // FIX (HJ): Include office:font-decls in styles DOM |
| nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS); |
| if (nodeList.getLength()>0){ |
| tmpNode = styleDoc.importNode(nodeList.item(0),true); |
| rootNode.appendChild(tmpNode); |
| } |
| |
| nodeList= newDoc.getElementsByTagName(TAG_OFFICE_STYLES); |
| if (nodeList.getLength()>0){ |
| tmpNode = styleDoc.importNode(nodeList.item(0),true); |
| rootNode.appendChild(tmpNode); |
| } |
| |
| // FIX (HJ): Include office:automatic-styles in styles DOM |
| nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); |
| if (nodeList.getLength()>0){ |
| tmpNode = styleDoc.importNode(nodeList.item(0),true); |
| rootNode.appendChild(tmpNode); |
| } |
| |
| // FIX (HJ): Include office:master-styles in styles DOM |
| nodeList= newDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES); |
| if (nodeList.getLength()>0){ |
| tmpNode = styleDoc.importNode(nodeList.item(0),true); |
| rootNode.appendChild(tmpNode); |
| } |
| |
| /*Settings*/ |
| settingsDoc = createDOM(TAG_OFFICE_DOCUMENT_SETTINGS); |
| rootElement=settingsDoc.getDocumentElement(); |
| rootNode = (Node)rootElement; |
| nodeList= newDoc.getElementsByTagName(TAG_OFFICE_SETTINGS); |
| if (nodeList.getLength()>0){ |
| tmpNode = settingsDoc.importNode(nodeList.item(0),true); |
| rootNode.appendChild(tmpNode); |
| } |
| /*Meta*/ |
| metaDoc = createDOM(TAG_OFFICE_DOCUMENT_META); |
| rootElement=metaDoc.getDocumentElement(); |
| rootNode = (Node)rootElement; |
| nodeList= newDoc.getElementsByTagName(TAG_OFFICE_META); |
| if (nodeList.getLength()>0){ |
| tmpNode = metaDoc.importNode(nodeList.item(0),true); |
| rootNode.appendChild(tmpNode); |
| } |
| } |
| } |
| catch (SAXException ex) { |
| throw new OfficeDocumentException(ex); |
| } |
| } |
| |
| } |
| |
| |
| |
| /** |
| * Parse given <code>byte</code> array into a DOM |
| * <code>Document</code> object using the |
| * <code>DocumentBuilder</code> object. |
| * |
| * @param builder <code>DocumentBuilder</code> object for parsing. |
| * @param bytes <code>byte</code> array for parsing. |
| * |
| * @return Resulting DOM <code>Document</code> object. |
| * |
| * @throws SAXException If any parsing error occurs. |
| */ |
| static Document parse(DocumentBuilder builder, byte bytes[]) |
| throws SAXException, IOException { |
| |
| Document doc = null; |
| |
| ByteArrayInputStream is = new ByteArrayInputStream(bytes); |
| |
| // TODO: replace hack with a more appropriate fix. |
| |
| Reader r = hack(is); |
| InputSource ins = new InputSource(r); |
| doc = builder.parse(ins); |
| |
| return doc; |
| } |
| |
| |
| /** |
| * Method to return the MIME type of the document. |
| * |
| * @return String The document's MIME type. |
| */ |
| protected abstract String getDocumentMimeType(); |
| |
| |
| /** |
| * Write out Office ZIP file format. |
| * |
| * @param os XML <code>OutputStream</code>. |
| * |
| * @throws IOException If any I/O error occurs. |
| */ |
| public void write(OutputStream os) throws IOException { |
| if (zip == null) { |
| zip = new OfficeZip(); |
| } |
| |
| initManifestDOM(); |
| |
| Element domEntry; |
| Element manifestRoot = manifestDoc.getDocumentElement(); |
| |
| // The EmbeddedObjects come first. |
| Iterator embObjs = getEmbeddedObjects(); |
| while (embObjs.hasNext()) { |
| EmbeddedObject obj = (EmbeddedObject)embObjs.next(); |
| obj.writeManifestData(manifestDoc); |
| |
| obj.write(zip); |
| } |
| |
| // Add in the entry for the Pictures directory. Always present. |
| domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE); |
| domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "Pictures/"); |
| domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, ""); |
| manifestRoot.appendChild(domEntry); |
| |
| // Write content to the Zip file and then write any of the optional |
| // data, if it exists. |
| zip.setContentXMLBytes(docToBytes(contentDoc)); |
| |
| domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE); |
| domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "content.xml"); |
| domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml"); |
| |
| manifestRoot.appendChild(domEntry); |
| |
| if (styleDoc != null) { |
| zip.setStyleXMLBytes(docToBytes(styleDoc)); |
| |
| domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE); |
| domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "styles.xml"); |
| domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml"); |
| manifestRoot.appendChild(domEntry); |
| } |
| |
| if (metaDoc != null) { |
| zip.setMetaXMLBytes(docToBytes(metaDoc)); |
| |
| domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE); |
| domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "meta.xml"); |
| domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml"); |
| manifestRoot.appendChild(domEntry); |
| } |
| |
| if (settingsDoc != null) { |
| zip.setSettingsXMLBytes(docToBytes(settingsDoc)); |
| |
| domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE); |
| domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "settings.xml"); |
| domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml"); |
| manifestRoot.appendChild(domEntry); |
| } |
| |
| zip.setManifestXMLBytes(docToBytes(manifestDoc)); |
| |
| zip.write(os); |
| } |
| |
| |
| /** |
| * Write out Office ZIP file format. |
| * |
| * @param os XML <code>OutputStream</code>. |
| * @param isZip <code>boolean</code> |
| * |
| * @throws IOException If any I/O error occurs. |
| */ |
| public void write(OutputStream os, boolean isZip) throws IOException { |
| |
| // Create an OfficeZip object if one does not exist. |
| if (isZip){ |
| write(os); |
| } |
| else{ |
| try{ |
| DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); |
| DocumentBuilder builder= builderFactory.newDocumentBuilder(); |
| DOMImplementation domImpl = builder.getDOMImplementation(); |
| DocumentType docType =domImpl.createDocumentType("office:document","-//OpenOffice.org//DTD OfficeDocument 1.0//EN",null); |
| org.w3c.dom.Document newDoc = domImpl.createDocument("http://openoffice.org/2000/office","office:document",null); |
| |
| |
| Element rootElement=newDoc.getDocumentElement(); |
| rootElement.setAttribute("xmlns:office","http://openoffice.org/2000/office"); |
| rootElement.setAttribute("xmlns:style","http://openoffice.org/2000/style" ); |
| rootElement.setAttribute("xmlns:text","http://openoffice.org/2000/text"); |
| rootElement.setAttribute("xmlns:table","http://openoffice.org/2000/table"); |
| |
| rootElement.setAttribute("xmlns:draw","http://openoffice.org/2000/drawing"); |
| rootElement.setAttribute("xmlns:fo","http://www.w3.org/1999/XSL/Format" ); |
| rootElement.setAttribute("xmlns:xlink","http://www.w3.org/1999/xlink" ); |
| rootElement.setAttribute("xmlns:dc","http://purl.org/dc/elements/1.1/" ); |
| rootElement.setAttribute("xmlns:meta","http://openoffice.org/2000/meta" ); |
| rootElement.setAttribute("xmlns:number","http://openoffice.org/2000/datastyle" ); |
| rootElement.setAttribute("xmlns:svg","http://www.w3.org/2000/svg" ); |
| rootElement.setAttribute("xmlns:chart","http://openoffice.org/2000/chart" ); |
| rootElement.setAttribute("xmlns:dr3d","http://openoffice.org/2000/dr3d" ); |
| rootElement.setAttribute("xmlns:math","http://www.w3.org/1998/Math/MathML" ); |
| rootElement.setAttribute("xmlns:form","http://openoffice.org/2000/form" ); |
| rootElement.setAttribute("xmlns:script","http://openoffice.org/2000/script" ); |
| rootElement.setAttribute("xmlns:config","http://openoffice.org/2001/config" ); |
| // #i41033# OASIS format needs the "office:class" set. |
| if(getDocumentMimeType() == SXC_MIME_TYPE) |
| rootElement.setAttribute("office:class","spreadsheet" ); |
| else if(getDocumentMimeType() == SXW_MIME_TYPE) |
| rootElement.setAttribute("office:class","text" ); |
| rootElement.setAttribute("office:version","1.0"); |
| |
| |
| NodeList nodeList; |
| Node tmpNode; |
| Node rootNode = (Node)rootElement; |
| if (metaDoc !=null){ |
| nodeList= metaDoc.getElementsByTagName(TAG_OFFICE_META); |
| if (nodeList.getLength()>0){ |
| tmpNode = newDoc.importNode(nodeList.item(0),true); |
| rootNode.appendChild(tmpNode); |
| } |
| }if (styleDoc !=null){ |
| nodeList= styleDoc.getElementsByTagName(TAG_OFFICE_STYLES); |
| if (nodeList.getLength()>0){ |
| tmpNode = newDoc.importNode(nodeList.item(0),true); |
| rootNode.appendChild(tmpNode); |
| } |
| |
| }if (settingsDoc !=null){ |
| nodeList= settingsDoc.getElementsByTagName(TAG_OFFICE_SETTINGS); |
| if (nodeList.getLength()>0){ |
| tmpNode = newDoc.importNode(nodeList.item(0),true); |
| rootNode.appendChild(tmpNode); |
| } |
| } |
| if (contentDoc !=null){ |
| nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); |
| if (nodeList.getLength()>0){ |
| tmpNode = newDoc.importNode(nodeList.item(0),true); |
| rootNode.appendChild(tmpNode); |
| } |
| |
| nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_BODY); |
| if (nodeList.getLength()>0){ |
| tmpNode = newDoc.importNode(nodeList.item(0),true); |
| rootNode.appendChild(tmpNode); |
| } |
| } |
| |
| byte contentBytes[] = docToBytes(newDoc); |
| //System.out.println(new String(contentBytes)); |
| os.write(contentBytes); |
| } |
| catch(Exception exc){ |
| System.out.println("\nException in OfficeDocument.write():" +exc); |
| } |
| //byte contentBytes[] = docToBytes(contentDoc); |
| } |
| } |
| |
| |
| /** |
| * <p>Write out a <code>org.w3c.dom.Document</code> object into a |
| * <code>byte</code> array.</p> |
| * |
| * <p>TODO: remove dependency on com.sun.xml.tree.XmlDocument |
| * package!</p> |
| * |
| * @param doc DOM <code>Document</code> object. |
| * |
| * @return <code>byte</code> array of DOM <code>Document</code> |
| * object. |
| * |
| * @throws IOException If any I/O error occurs. |
| */ |
| static byte[] docToBytes(Document doc) |
| throws IOException { |
| |
| ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
| |
| java.lang.reflect.Constructor con; |
| java.lang.reflect.Method meth; |
| |
| String domImpl = doc.getClass().getName(); |
| |
| /* |
| * We may have multiple XML parsers in the Classpath. |
| * Depending on which one is first, the actual type of |
| * doc may vary. Need a way to find out which API is being |
| * used and use an appropriate serialization method. |
| */ |
| |
| try { |
| // First of all try for JAXP 1.0 |
| if (domImpl.equals("com.sun.xml.tree.XmlDocument")) { |
| |
| Debug.log(Debug.INFO, "Using JAXP"); |
| |
| Class jaxpDoc = Class.forName("com.sun.xml.tree.XmlDocument"); |
| |
| // The method is in the XMLDocument class itself, not a helper |
| meth = jaxpDoc.getMethod("write", |
| new Class[] { Class.forName("java.io.OutputStream") } ); |
| |
| meth.invoke(doc, new Object [] { baos } ); |
| } |
| else if (domImpl.equals("org.apache.crimson.tree.XmlDocument")) |
| { |
| Debug.log(Debug.INFO, "Using Crimson"); |
| |
| Class crimsonDoc = Class.forName("org.apache.crimson.tree.XmlDocument"); |
| // The method is in the XMLDocument class itself, not a helper |
| meth = crimsonDoc.getMethod("write", |
| new Class[] { Class.forName("java.io.OutputStream") } ); |
| |
| meth.invoke(doc, new Object [] { baos } ); |
| } |
| else if (domImpl.equals("org.apache.xerces.dom.DocumentImpl") |
| || domImpl.equals("org.apache.xerces.dom.DeferredDocumentImpl")) { |
| |
| Debug.log(Debug.INFO, "Using Xerces"); |
| |
| // Try for Xerces |
| Class xercesSer = |
| Class.forName("org.apache.xml.serialize.XMLSerializer"); |
| |
| // Get the OutputStream constructor |
| // May want to use the OutputFormat parameter at some stage too |
| con = xercesSer.getConstructor(new Class [] |
| { Class.forName("java.io.OutputStream"), |
| Class.forName("org.apache.xml.serialize.OutputFormat") } ); |
| |
| |
| // Get the serialize method |
| meth = xercesSer.getMethod("serialize", |
| new Class [] { Class.forName("org.w3c.dom.Document") } ); |
| |
| |
| // Get an instance |
| Object serializer = con.newInstance(new Object [] { baos, null } ); |
| |
| |
| // Now call serialize to write the document |
| meth.invoke(serializer, new Object [] { doc } ); |
| } |
| else if (domImpl.equals("gnu.xml.dom.DomDocument")) { |
| Debug.log(Debug.INFO, "Using GNU"); |
| |
| Class gnuSer = Class.forName("gnu.xml.dom.ls.DomLSSerializer"); |
| |
| // Get the serialize method |
| meth = gnuSer.getMethod("serialize", |
| new Class [] { Class.forName("org.w3c.dom.Node"), |
| Class.forName("java.io.OutputStream") } ); |
| |
| // Get an instance |
| Object serializer = gnuSer.newInstance(); |
| |
| // Now call serialize to write the document |
| meth.invoke(serializer, new Object [] { doc, baos } ); |
| } |
| else { |
| try { |
| DOMSource domSource = new DOMSource(doc); |
| StringWriter writer = new StringWriter(); |
| StreamResult result = new StreamResult(writer); |
| TransformerFactory tf = TransformerFactory.newInstance(); |
| Transformer transformer = tf.newTransformer(); |
| transformer.transform(domSource, result); |
| return writer.toString().getBytes(); |
| } |
| catch (Exception e) { |
| // We don't have another parser |
| throw new IOException("No appropriate API (JAXP/Xerces) to serialize XML document: " + domImpl); |
| } |
| } |
| } |
| catch (ClassNotFoundException cnfe) { |
| throw new IOException(cnfe.toString()); |
| } |
| catch (Exception e) { |
| // We may get some other errors, but the bottom line is that |
| // the steps being executed no longer work |
| throw new IOException(e.toString()); |
| } |
| |
| byte bytes[] = baos.toByteArray(); |
| |
| return bytes; |
| } |
| |
| |
| /** |
| * Initializes a new DOM <code>Document</code> with the content |
| * containing minimum OpenOffice XML tags. |
| * |
| * @throws IOException If any I/O error occurs. |
| */ |
| public final void initContentDOM() throws IOException { |
| |
| contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT); |
| |
| // this is a work-around for a bug in Office6.0 - not really |
| // needed but StarCalc 6.0 will crash without this tag. |
| Element root = contentDoc.getDocumentElement(); |
| |
| Element child = contentDoc.createElement(TAG_OFFICE_FONT_DECLS); |
| root.appendChild(child); |
| |
| child = contentDoc.createElement(TAG_OFFICE_AUTOMATIC_STYLES); |
| root.appendChild(child); |
| |
| child = contentDoc.createElement(TAG_OFFICE_BODY); |
| root.appendChild(child); |
| } |
| |
| /** |
| * Initializes a new DOM <code>Document</code> with the content |
| * containing minimum OpenOffice XML tags. |
| * |
| * @throws IOException If any I/O error occurs. |
| */ |
| public final void initSettingsDOM() throws IOException { |
| |
| settingsDoc = createSettingsDOM(TAG_OFFICE_DOCUMENT_SETTINGS); |
| |
| // this is a work-around for a bug in Office6.0 - not really |
| // needed but StarCalc 6.0 will crash without this tag. |
| Element root = settingsDoc.getDocumentElement(); |
| |
| Element child = settingsDoc.createElement(TAG_OFFICE_SETTINGS); |
| root.appendChild(child); |
| } |
| |
| /** |
| * Initializes a new DOM Document with styles |
| * containing minimum OpenOffice XML tags. |
| * |
| * @throws IOException If any I/O error occurs. |
| */ |
| public final void initStyleDOM() throws IOException { |
| |
| styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES); |
| } |
| |
| /** |
| * <p>Creates a new DOM <code>Document</code> containing minimum |
| * OpenOffice.org XML tags.</p> |
| * |
| * <p>This method uses the subclass |
| * <code>getOfficeClassAttribute</code> method to get the |
| * attribute for <i>office:class</i>.</p> |
| * |
| * @param rootName root name of <code>Document</code>. |
| * |
| * @throws IOException If any I/O error occurs. |
| */ |
| private final Document createSettingsDOM(String rootName) throws IOException { |
| |
| Document doc = null; |
| |
| try { |
| |
| DocumentBuilder builder = factory.newDocumentBuilder(); |
| doc = builder.newDocument(); |
| |
| } catch (ParserConfigurationException ex) { |
| |
| throw new OfficeDocumentException(ex); |
| |
| } |
| |
| Element root = (Element) doc.createElement(rootName); |
| doc.appendChild(root); |
| |
| root.setAttribute("xmlns:office", "http://openoffice.org/2000/office"); |
| root.setAttribute("xmlns:xlink", "http://openoffice.org/1999/xlink"); |
| root.setAttribute("xmlns:config", "http://openoffice.org/2001/config"); |
| root.setAttribute("office:version", "1.0"); |
| |
| return doc; |
| } |
| |
| |
| /** |
| * <p>Creates a new DOM <code>Document</code> containing minimum |
| * OpenOffice.org XML tags.</p> |
| * |
| * <p>This method uses the subclass |
| * <code>getOfficeClassAttribute</code> method to get the |
| * attribute for <i>office:class</i>.</p> |
| * |
| * @param rootName root name of <code>Document</code>. |
| * |
| * @throws IOException If any I/O error occurs. |
| */ |
| private final Document createDOM(String rootName) throws IOException { |
| |
| Document doc = null; |
| |
| try { |
| |
| DocumentBuilder builder = factory.newDocumentBuilder(); |
| doc = builder.newDocument(); |
| |
| } catch (ParserConfigurationException ex) { |
| |
| throw new OfficeDocumentException(ex); |
| |
| } |
| |
| Element root = (Element) doc.createElement(rootName); |
| doc.appendChild(root); |
| |
| root.setAttribute("xmlns:office", "http://openoffice.org/2000/office"); |
| root.setAttribute("xmlns:style", "http://openoffice.org/2000/style"); |
| root.setAttribute("xmlns:text", "http://openoffice.org/2000/text"); |
| root.setAttribute("xmlns:table", "http://openoffice.org/2000/table"); |
| root.setAttribute("xmlns:draw", "http://openoffice.org/2000/drawing"); |
| root.setAttribute("xmlns:fo", "http://www.w3.org/1999/XSL/Format"); |
| root.setAttribute("xmlns:xlink", "http://www.w3.org/1999/xlink"); |
| root.setAttribute("xmlns:number", "http://openoffice.org/2000/datastyle"); |
| root.setAttribute("xmlns:svg", "http://www.w3.org/2000/svg"); |
| root.setAttribute("xmlns:chart", "http://openoffice.org/2000/chart"); |
| root.setAttribute("xmlns:dr3d", "http://openoffice.org/2000/dr3d"); |
| root.setAttribute("xmlns:math", "http://www.w3.org/1998/Math/MathML"); |
| root.setAttribute("xmlns:form", "http://openoffice.org/2000/form"); |
| root.setAttribute("xmlns:script", "http://openoffice.org/2000/script"); |
| root.setAttribute("office:class", getOfficeClassAttribute()); |
| root.setAttribute("office:version", "1.0"); |
| |
| return doc; |
| } |
| |
| |
| /** |
| * Return the <i>office:class</i> attribute value. |
| * |
| * @return The attribute value. |
| */ |
| protected abstract String getOfficeClassAttribute(); |
| |
| |
| /** |
| * <p>Hacked code to filter <!DOCTYPE> tag before |
| * sending stream to parser.</p> |
| * |
| * <p>This hacked code needs to be changed later on.</p> |
| * |
| * <p>Issue: using current jaxp1.0 parser, there is no way |
| * to turn off processing of dtds. Current set of dtds |
| * have bugs, processing them will throw exceptions.</p> |
| * |
| * <p>This is a simple hack that assumes the whole <!DOCTYPE> |
| * tag are all in the same line. This is sufficient for |
| * current StarOffice 6.0 generated XML files. Since this |
| * hack really needs to go away, I don't want to spend |
| * too much time in making it a perfect hack.</p> |
| * FIX (HJ): Removed requirement for DOCTYPE to be in one line |
| * FIX (HJ): No longer removes newlines |
| * |
| * @param is <code>InputStream</code> to be filtered. |
| * |
| * @return Reader value without the <!DOCTYPE> tag. |
| * |
| * @throws IOException If any I/O error occurs. |
| */ |
| private static Reader hack(InputStream is) throws IOException { |
| |
| BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8")); |
| StringBuffer buffer = new StringBuffer(); |
| |
| String str = null; |
| |
| while ((str = br.readLine()) != null) { |
| |
| int sIndex = str.indexOf("<!DOCTYPE"); |
| |
| if (sIndex > -1) { |
| |
| buffer.append(str.substring(0, sIndex)); |
| |
| int eIndex = str.indexOf('>', sIndex + 8 ); |
| |
| if (eIndex > -1) { |
| |
| buffer.append(str.substring(eIndex + 1, str.length())); |
| // FIX (HJ): Preserve the newline |
| buffer.append("\n"); |
| |
| } else { |
| |
| // FIX (HJ): More than one line. Search for '>' in following lines |
| boolean bOK = false; |
| while ((str = br.readLine())!=null) { |
| eIndex = str.indexOf('>'); |
| if (eIndex>-1) { |
| buffer.append(str.substring(eIndex+1)); |
| // FIX (HJ): Preserve the newline |
| buffer.append("\n"); |
| bOK = true; |
| break; |
| } |
| } |
| |
| if (!bOK) { throw new IOException("Invalid XML"); } |
| } |
| |
| } else { |
| |
| buffer.append(str); |
| // FIX (HJ): Preserve the newline |
| buffer.append("\n"); |
| } |
| } |
| |
| StringReader r = new StringReader(buffer.toString()); |
| return r; |
| } |
| |
| /** |
| * <p>Transform the InputStream to a Reader Stream.</p> |
| * |
| * <p>This hacked code needs to be changed later on.</p> |
| * |
| * <p>Issue: the new oasis input file stream means |
| * that the old input stream fails. see #i33702# </p> |
| * |
| * @param is <code>InputStream</code> to be filtered. |
| * |
| * @return Reader value of the InputStream(). |
| * |
| * @throws IOException If any I/O error occurs. |
| */ |
| private static Reader secondHack(InputStream is) throws IOException { |
| |
| BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8")); |
| char[] charArray = new char[4096]; |
| StringBuffer sBuf = new StringBuffer(); |
| int n = 0; |
| while ((n=br.read(charArray, 0, charArray.length)) > 0) |
| sBuf.append(charArray, 0, n); |
| |
| // ensure there is no trailing garbage after the end of the stream. |
| int sIndex = sBuf.lastIndexOf("</office:document>"); |
| sBuf.delete(sIndex, sBuf.length()); |
| sBuf.append("</office:document>"); |
| StringReader r = new StringReader(sBuf.toString()); |
| return r; |
| } |
| |
| |
| /** |
| * Method to create the initial entries in the manifest.xml file stored |
| * in an SX? file. |
| */ |
| private void initManifestDOM() throws IOException { |
| |
| try { |
| DocumentBuilder builder = factory.newDocumentBuilder(); |
| DOMImplementation domImpl = builder.getDOMImplementation(); |
| |
| DocumentType docType = domImpl.createDocumentType(TAG_MANIFEST_ROOT, |
| "-//OpenOffice.org//DTD Manifest 1.0//EN", |
| "Manifest.dtd"); |
| manifestDoc = domImpl.createDocument("manifest", TAG_MANIFEST_ROOT, docType); |
| } catch (ParserConfigurationException ex) { |
| throw new OfficeDocumentException(ex); |
| } |
| |
| // Add the <manifest:manifest> entry |
| Element manifestRoot = manifestDoc.getDocumentElement(); |
| |
| manifestRoot.setAttribute("xmlns:manifest", "http://openoffice.org/2001/manifest"); |
| |
| Element docRoot = manifestDoc.createElement(TAG_MANIFEST_FILE); |
| |
| docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "/"); |
| docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, getDocumentMimeType()); |
| |
| manifestRoot.appendChild(docRoot); |
| } |
| } |
| |