| /************************************************************** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| *************************************************************/ |
| |
| |
| |
| package org.openoffice.xmerge.converter.xml.sxw.pocketword; |
| |
| import org.openoffice.xmerge.Document; |
| import org.openoffice.xmerge.converter.xml.ParaStyle; |
| import org.openoffice.xmerge.converter.xml.TextStyle; |
| |
| import java.io.InputStream; |
| import java.io.IOException; |
| import java.io.OutputStream; |
| import java.io.ByteArrayOutputStream; |
| import java.io.DataOutputStream; |
| |
| import java.util.Enumeration; |
| import java.util.Vector; |
| |
| |
| /** |
| * <p>Class representing a Pocket Word Document.</p> |
| * |
| * <p><code>PocketWordDocument</code> is used to create new Pocket Word documents |
| * and to read existing data to allow for conversion to OpenOffice.org XML Writer |
| * format.</p> |
| * |
| * @author Mark Murnane |
| * @version 1.1 |
| */ |
| public class PocketWordDocument implements Document, PocketWordConstants { |
| private String docName; |
| |
| private byte[] preamble; |
| private Vector fonts; |
| private DocumentDescriptor descriptor; |
| private Vector paragraphs; |
| |
| private ParaStyle pStyle; |
| private Paragraph currentPara; |
| |
| /* |
| * The trailer currently appears to be constant, but if its found to |
| * have a variable component, then this initialisation should be moved |
| * to an initTrailer() method. |
| * |
| * Padding is sometimes needed before the trailer to ensure the file |
| * ends on a 4-byte boundary, but this is handled in write(). |
| */ |
| private static final byte[] trailer = new byte[] { (byte)0x82, 0x00, |
| 0x09, 0x00, |
| 0x03, 0x00, |
| (byte)0x82, 0x00, |
| 0x00, 0x00, |
| 0x00, 0x00, |
| 0x00, 0x00, |
| 0x00, 0x00, |
| 0x00, 0x00 }; |
| |
| |
| /** |
| * <p>Constructs a new Pocket Word Document.</p> |
| * |
| * <p>This new document does notcontain any information. Document data must |
| * either be added using appropriate methods, or an existing file can be |
| * {@link #read(InputStream) read} from an <code>InputStream</code>.</p> |
| * |
| * @param name The name of the <code>PocketWordDocument</code>. |
| */ |
| public PocketWordDocument(String name) { |
| |
| docName = trimDocumentName(name); |
| |
| preamble = new byte[52]; |
| fonts = new Vector(0, 1); |
| descriptor = new DocumentDescriptor(); |
| paragraphs = new Vector(0, 1); |
| } |
| |
| |
| /** |
| * <p>This method reads <code>byte</code> data from the InputStream and |
| * extracts font and paragraph data from the file.</p> |
| * |
| * @param is InputStream containing a Pocket Word data file. |
| * |
| * @throws IOException In case of any I/O errors. |
| */ |
| public void read(InputStream docData) throws IOException { |
| |
| if (docData == null) { |
| throw new IOException ("No input stream to convert"); |
| } |
| |
| // The preamble may become important for font declarations. |
| int readValue = docData.read(preamble); |
| // #i33702# check for an empty InputStream. |
| if(readValue == -1) { |
| System.err.println("Error:invalid input stream"); |
| return; |
| } |
| |
| byte[] font = new byte[80]; |
| int numfonts = 0; |
| do { |
| docData.read(font); |
| |
| String name = new String(font, 0, 64, "UTF-16LE"); |
| fonts.add(name.trim()); |
| |
| } while (!(font[76] == 5 && font[77] == 0 |
| && font[78] == 1 && font[79] == 0)); |
| |
| /* |
| * TODO: The document descriptor data that follows the fonts ends with |
| * a variable section containing data for each of the paragraphs. |
| * It may be possible to use this information to calculate staring |
| * positions for each paragraph rather than iterating through the |
| * entire byte stream. |
| */ |
| |
| int value; |
| ByteArrayOutputStream bos = new ByteArrayOutputStream(); |
| while ((value = docData.read()) != -1) { |
| bos.write(value); |
| } |
| |
| |
| byte[] contentData = bos.toByteArray(); |
| int start = 0, end = 0; |
| boolean sawMarker = false; |
| |
| for (int i = 0; i < contentData.length; i += 4) { |
| if (contentData[i + 2] == (byte)0xFF |
| && contentData[i + 3] == (byte)0xFF && !sawMarker) { |
| start = i - 8; |
| sawMarker = true; |
| continue; |
| } |
| |
| if (contentData[i + 2] == (byte)0xFF |
| && contentData[i + 3] == (byte)0xFF && sawMarker) { |
| end = i - 8; |
| ByteArrayOutputStream paragraph = new ByteArrayOutputStream(); |
| paragraph.write(contentData, start, end - start); |
| paragraphs.add(new Paragraph(paragraph.toByteArray())); |
| |
| // Reset the markers |
| sawMarker = false; |
| i -= 4; // Skip back |
| } |
| |
| } |
| |
| /* |
| * Special case, the last paragraph |
| * If we got here, and the marker is set then we saw the start of the |
| * last paragraph, but no following paragraph |
| */ |
| ByteArrayOutputStream paragraph = new ByteArrayOutputStream(); |
| if (contentData[contentData.length - 19] == 0) { |
| paragraph.write(contentData, start, contentData.length - start - 20); |
| } |
| else { |
| paragraph.write(contentData, start, contentData.length - start - 18); |
| } |
| paragraphs.add(new Paragraph(paragraph.toByteArray())); |
| } |
| |
| |
| /* |
| * Utility method to make sure the document name is stripped of any file |
| * extensions before use. |
| */ |
| private String trimDocumentName(String name) { |
| String temp = name.toLowerCase(); |
| |
| if (temp.endsWith(FILE_EXTENSION)) { |
| // strip the extension |
| int nlen = name.length(); |
| int endIndex = nlen - FILE_EXTENSION.length(); |
| name = name.substring(0,endIndex); |
| } |
| |
| return name; |
| } |
| |
| |
| /** |
| * <p>Method to provide access to all of the <code>Paragraph</code> objects |
| * in the <code>Document</code>.</p> |
| * |
| * @return <code>Enumeration</code> over the paragraphs in the document. |
| */ |
| public Enumeration getParagraphEnumeration() { |
| return paragraphs.elements(); |
| } |
| |
| |
| /** |
| * <p>Returns the <code>Document</code> name with no file extension.</p> |
| * |
| * @return The <code>Document</code> name with no file extension. |
| */ |
| public String getName() { |
| return docName; |
| } |
| |
| |
| /** |
| * <p>Returns the <code>Document</code> name with file extension.</p> |
| * |
| * @return The <code>Document</code> name with file extension. |
| */ |
| public String getFileName() { |
| return new String(docName + FILE_EXTENSION); |
| } |
| |
| |
| /** |
| * <p>Writes out the <code>Document</code> content to the specified |
| * <code>OutputStream</code>.</p> |
| * |
| * <p>This method may not be thread-safe. |
| * Implementations may or may not synchronize this |
| * method. User code (i.e. caller) must make sure that |
| * calls to this method are thread-safe.</p> |
| * |
| * @param os <code>OutputStream</code> to write out the |
| * <code>Document</code> content. |
| * |
| * @throws IOException If any I/O error occurs. |
| */ |
| public void write(OutputStream os) throws IOException { |
| DataOutputStream dos = new DataOutputStream(os); |
| |
| initPreamble(); |
| dos.write(preamble); |
| |
| loadFonts(); |
| for (int i = 0; i < fonts.size(); i++ ) { |
| ByteArrayOutputStream fontData = (ByteArrayOutputStream)fonts.elementAt(i); |
| dos.write(fontData.toByteArray()); |
| } |
| |
| |
| for (int i = 0; i < paragraphs.size(); i++) { |
| Paragraph para = (Paragraph)paragraphs.elementAt(i); |
| descriptor.addParagraph((short)para.getTextLength(), para.getLines()); |
| } |
| dos.write(descriptor.getDescriptor()); |
| |
| for (int i = 0; i < paragraphs.size(); i++ ) { |
| Paragraph para = (Paragraph)paragraphs.elementAt(i); |
| |
| // Last paragraph has some extra data |
| if (i + 1 == paragraphs.size()) { |
| para.setLastParagraph(true); |
| } |
| dos.write(para.getParagraphData()); |
| } |
| |
| |
| /* |
| * Before we write out the trailer, we need to make sure that it will |
| * lead to the file ending on a 4 byte boundary. |
| */ |
| if (dos.size() % 4 == 0) { |
| dos.write((byte)0x00); |
| dos.write((byte)0x00); |
| } |
| |
| dos.write(trailer); |
| |
| dos.flush(); |
| dos.close(); |
| } |
| |
| |
| /** |
| * <p>This method adds a new paragraph element to the document. No string |
| * data is added to the paragraph.</p> |
| * |
| * <p><b>N.B.</b> The newly added paragraph becomes the current paragraph and |
| * is used as the target for all subsequent calls to addParagraphData().</p> |
| * |
| * @param style Paragraph Style object describing the formatting for |
| * the new paragraph. Can be null. |
| * @param listElement true if this paragraph is to be bulleted; |
| * false otherwise. |
| */ |
| public void addParagraph(ParaStyle style, boolean listElement) { |
| /* For the moment, only support basic text entry in a single paragraph */ |
| Paragraph para = new Paragraph(style); |
| |
| paragraphs.add(para); |
| |
| pStyle = style; |
| currentPara = para; |
| |
| if (listElement) { |
| para.setBullets(true); |
| } |
| } |
| |
| |
| /** |
| * <p>This method adds text to the current paragraph.</p> |
| * |
| * <p>If no paragraphs exist within the document, it creates one.</p> |
| * |
| * @param data The string data for this segment. |
| * @param style Text Style object describing the formatting of this |
| * segment. Can be null. |
| */ |
| public void addParagraphData(String data, TextStyle style) { |
| if (currentPara == null) { |
| addParagraph(null, false); |
| } |
| currentPara.addTextSegment(data, style); |
| } |
| |
| |
| /* |
| * Preamble is the portion before font specification which never |
| * seems to change from one file, or one saved version, to the next. |
| * |
| * Bytes 18h and 19h seem to contain the number of fonts and should |
| * be modified when all of the fonts have been specified. |
| * These bytes are the first two on the fourth line below. |
| */ |
| private void initPreamble() { |
| preamble = new byte[] { 0x7B, 0x5C, 0x70, 0x77, 0x69, 0x15, 0x00, 0x00, |
| 0x01, 0x01, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 0x02, 0x00, 0x2C, 0x00, 0x01, 0x00, 0x0A, 0x00, // Bytes 3-4 Font?? |
| 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bytes 1-2 # Fonts |
| 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, |
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 0x00, 0x00, 0x00, 0x00 }; |
| } |
| |
| |
| /* |
| * This method writes the minimum font data that is used by the converter. |
| * Currently, all documents convert to 10 point Courier New. Tahoma is |
| * always mentioned in Pocket Word files, however, even if it is not used. |
| * |
| * TODO: Rewrite to allow for multiple fonts once font support issues |
| * have been resolved. |
| */ |
| private void loadFonts() { |
| ByteArrayOutputStream fontData = new ByteArrayOutputStream(); |
| |
| try { |
| fontData.write(new String("Tahoma").getBytes("UTF-16LE")); |
| fontData.write(new byte[52]); // Rest of font name? |
| fontData.write(new byte[] { 0x02, 0x00, 0x01, 0x00 } ); |
| fontData.write(new byte[] { 0x00, 0x00, 0x01, 0x00 } ); |
| fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } ); |
| fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } ); |
| |
| fonts.add(fontData); |
| |
| fontData = new ByteArrayOutputStream(); |
| |
| fontData.write(new String("Courier New").getBytes("UTF-16LE")); |
| fontData.write(new byte[42]); |
| fontData.write(new byte[] { 0x14, 0x00, 0x04, 0x00 } ); |
| fontData.write(new byte[] { 0x01, 0x00, 0x00, 0x00 } ); |
| fontData.write(new byte[] { 0x00, 0x00, 0x15, 0x00 } ); |
| |
| // Next part indicates that this is the last font |
| fontData.write(new byte[] { 0x05, 0x00, 0x01, 0x00 } ); |
| |
| fonts.add(fontData); |
| } |
| catch (IOException ioe) { |
| // Shouldn't happen as this is a memory based stream |
| } |
| } |
| } |