AOO410/main/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java - openoffice - Git at Google

 /**************************************************************
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  *
  *************************************************************/


 package org.openoffice.xmerge.converter.xml.sxw.pocketword;

 import org.openoffice.xmerge.Document;
 import org.openoffice.xmerge.converter.xml.ParaStyle;
 import org.openoffice.xmerge.converter.xml.TextStyle;

 import java.io.InputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.DataOutputStream;

 import java.util.Enumeration;
 import java.util.Vector;


 /**
  * <p>Class representing a Pocket Word Document.</p>
  *
  * <p><code>PocketWordDocument</code> is used to create new Pocket Word documents
  *    and to read existing data to allow for conversion to OpenOffice.org XML Writer
  *    format.</p>
  *
  * @author  Mark Murnane
  * @version 1.1
  */
 public class PocketWordDocument implements Document, PocketWordConstants {
     private String      docName;

     private byte[] preamble;
     private Vector fonts;
     private DocumentDescriptor descriptor;
     private Vector paragraphs;

     private ParaStyle   pStyle;
     private Paragraph   currentPara;

     /*
      * The trailer currently appears to be constant, but if its found to
      * have a variable component, then this initialisation should be moved
      * to an initTrailer() method.
      *
      * Padding is sometimes needed before the trailer to ensure the file
      * ends on a 4-byte boundary, but this is handled in write().
      */
     private static final byte[] trailer = new byte[] { (byte)0x82, 0x00,
                                                              0x09, 0x00,
                                                              0x03, 0x00,
                                                              (byte)0x82, 0x00,
                                                              0x00, 0x00,
                                                              0x00, 0x00,
                                                              0x00, 0x00,
                                                              0x00, 0x00,
                                                              0x00, 0x00 };


     /**
      * <p>Constructs a new Pocket Word Document.</p>
      *
      * <p>This new document does notcontain any information.  Document data must
      *    either be added using appropriate methods, or an existing file can be
      *    {@link #read(InputStream) read} from an <code>InputStream</code>.</p>
      *
      * @param   name    The name of the <code>PocketWordDocument</code>.
      */
     public PocketWordDocument(String name) {

         docName = trimDocumentName(name);

         preamble   = new byte[52];
         fonts      = new Vector(0, 1);
         descriptor = new DocumentDescriptor();
         paragraphs = new Vector(0, 1);
     }


     /**
      * <p>This method reads <code>byte</code> data from the InputStream and
      *    extracts font and paragraph data from the file.</p>
      *
      * @param   is      InputStream containing a Pocket Word data file.
      *
      * @throws  IOException     In case of any I/O errors.
      */
     public void read(InputStream docData) throws IOException {

         if (docData == null) {
             throw new IOException ("No input stream to convert");
         }

         // The preamble may become important for font declarations.
         int readValue = docData.read(preamble);
         // #i33702# check for an empty InputStream.
         if(readValue == -1) {
             System.err.println("Error:invalid input stream");
             return;
         }

         byte[] font = new byte[80];
         int numfonts = 0;
         do {
             docData.read(font);

             String name = new String(font, 0, 64, "UTF-16LE");
             fonts.add(name.trim());

         } while (!(font[76] == 5 && font[77] == 0
                             && font[78] == 1 && font[79] == 0));

         /*
          * TODO:  The document descriptor data that follows the fonts ends with
          *        a variable section containing data for each of the paragraphs.
          *        It may be possible to use this information to calculate staring
          *        positions for each paragraph rather than iterating through the
          *        entire byte stream.
          */

         int value;
         ByteArrayOutputStream bos = new ByteArrayOutputStream();
         while ((value = docData.read()) != -1) {
             bos.write(value);
         }


         byte[] contentData = bos.toByteArray();
         int start = 0, end = 0;
         boolean sawMarker = false;

         for (int i = 0; i < contentData.length; i += 4) {
             if (contentData[i  + 2] == (byte)0xFF
                         && contentData[i + 3] == (byte)0xFF && !sawMarker)  {
                 start = i - 8;
                 sawMarker = true;
                 continue;
             }

             if (contentData[i + 2] == (byte)0xFF
                         && contentData[i + 3] == (byte)0xFF && sawMarker) {
                 end = i - 8;
                 ByteArrayOutputStream paragraph = new ByteArrayOutputStream();
                 paragraph.write(contentData, start, end - start);
                 paragraphs.add(new Paragraph(paragraph.toByteArray()));

                 // Reset the markers
                 sawMarker = false;
                 i -= 4;  // Skip back
             }

         }

         /*
          * Special case, the last paragraph
          * If we got here, and the marker is set then we saw the start of the
          * last paragraph, but no following paragraph
          */
         ByteArrayOutputStream paragraph = new ByteArrayOutputStream();
         if (contentData[contentData.length - 19] == 0) {
             paragraph.write(contentData, start, contentData.length - start - 20);
         }
         else {
             paragraph.write(contentData, start, contentData.length - start - 18);
         }
         paragraphs.add(new Paragraph(paragraph.toByteArray()));
     }


     /*
      * Utility method to make sure the document name is stripped of any file
      * extensions before use.
      */
     private String trimDocumentName(String name) {
         String temp = name.toLowerCase();

         if (temp.endsWith(FILE_EXTENSION)) {
             // strip the extension
             int nlen = name.length();
             int endIndex = nlen - FILE_EXTENSION.length();
             name = name.substring(0,endIndex);
         }

         return name;
     }


     /**
      * <p>Method to provide access to all of the <code>Paragraph</code> objects
      *    in the <code>Document</code>.</p>
      *
      * @return <code>Enumeration</code> over the paragraphs in the document.
      */
     public Enumeration getParagraphEnumeration() {
         return paragraphs.elements();
     }


     /**
      * <p>Returns the <code>Document</code> name with no file extension.</p>
      *
      * @return  The <code>Document</code> name with no file extension.
      */
     public String getName() {
         return docName;
     }


     /**
      * <p>Returns the <code>Document</code> name with file extension.</p>
      *
      * @return  The <code>Document</code> name with file extension.
      */
     public String getFileName() {
         return new String(docName + FILE_EXTENSION);
     }


     /**
      * <p>Writes out the <code>Document</code> content to the specified
      * <code>OutputStream</code>.</p>
      *
      * <p>This method may not be thread-safe.
      * Implementations may or may not synchronize this
      * method.  User code (i.e. caller) must make sure that
      * calls to this method are thread-safe.</p>
      *
      * @param  os  <code>OutputStream</code> to write out the
      *             <code>Document</code> content.
      *
      * @throws  IOException  If any I/O error occurs.
      */
     public void write(OutputStream os) throws IOException {
         DataOutputStream dos = new DataOutputStream(os);

         initPreamble();
         dos.write(preamble);

         loadFonts();
         for (int i = 0; i < fonts.size(); i++ ) {
             ByteArrayOutputStream fontData = (ByteArrayOutputStream)fonts.elementAt(i);
             dos.write(fontData.toByteArray());
         }


         for (int i = 0; i < paragraphs.size(); i++) {
             Paragraph para = (Paragraph)paragraphs.elementAt(i);
             descriptor.addParagraph((short)para.getTextLength(), para.getLines());
         }
         dos.write(descriptor.getDescriptor());

         for (int i = 0; i < paragraphs.size(); i++ ) {
             Paragraph para = (Paragraph)paragraphs.elementAt(i);

             // Last paragraph has some extra data
             if (i + 1 == paragraphs.size()) {
                 para.setLastParagraph(true);
             }
             dos.write(para.getParagraphData());
         }


         /*
          * Before we write out the trailer, we need to make sure that it will
          * lead to the file ending on a 4 byte boundary.
          */
         if (dos.size() % 4 == 0) {
             dos.write((byte)0x00);
             dos.write((byte)0x00);
         }

         dos.write(trailer);

         dos.flush();
         dos.close();
     }


     /**
      * <p>This method adds a new paragraph element to the document.  No string
      *    data is added to the paragraph.</p>
      *
      * <p><b>N.B.</b> The newly added paragraph becomes the current paragraph and
      *    is used as the target for all subsequent calls to addParagraphData().</p>
      *
      * @param   style       Paragraph Style object describing the formatting for
      *                      the new paragraph.  Can be null.
      * @param   listElement true if this paragraph is to be bulleted;
      *                      false otherwise.
      */
     public void addParagraph(ParaStyle style, boolean listElement)  {
         /* For the moment, only support basic text entry in a single paragraph */
         Paragraph para = new Paragraph(style);

         paragraphs.add(para);

         pStyle = style;
         currentPara = para;

         if (listElement) {
             para.setBullets(true);
         }
     }


     /**
      * <p>This method adds text to the current paragraph.</p>
      *
      * <p>If no paragraphs exist within the document, it creates one.</p>
      *
      * @param   data        The string data for this segment.
      * @param   style       Text Style object describing the formatting of this
      *                      segment.  Can be null.
      */
     public void addParagraphData(String data, TextStyle style) {
         if (currentPara == null) {
             addParagraph(null, false);
         }
         currentPara.addTextSegment(data, style);
     }


     /*
      * Preamble is the portion before font specification which never
      * seems to change from one file, or one saved version, to the next.
      *
      * Bytes 18h and 19h seem to contain the number of fonts and should
      * be modified when all of the fonts have been specified.
      * These bytes are the first two on the fourth line below.
      */
     private void initPreamble() {
          preamble = new byte[] { 0x7B, 0x5C, 0x70, 0x77, 0x69, 0x15, 0x00, 0x00,
                                  0x01, 0x01, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
                                  0x02, 0x00, 0x2C, 0x00, 0x01, 0x00, 0x0A, 0x00,  // Bytes 3-4 Font??
                                  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // Bytes 1-2  # Fonts
                                  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00,
                                  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
                                  0x00, 0x00, 0x00, 0x00 };
     }


     /*
      * This method writes the minimum font data that is used by the converter.
      * Currently, all documents convert to 10 point Courier New.  Tahoma is
      * always mentioned in Pocket Word files, however, even if it is not used.
      *
      * TODO:    Rewrite to allow for multiple fonts once font support issues
      *          have been resolved.
      */
     private void loadFonts() {
         ByteArrayOutputStream fontData = new ByteArrayOutputStream();

         try {
             fontData.write(new String("Tahoma").getBytes("UTF-16LE"));
             fontData.write(new byte[52]);       // Rest of font name?
             fontData.write(new byte[] { 0x02, 0x00, 0x01, 0x00 } );
             fontData.write(new byte[] { 0x00, 0x00, 0x01, 0x00 } );
             fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } );
             fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } );

             fonts.add(fontData);

             fontData = new ByteArrayOutputStream();

             fontData.write(new String("Courier New").getBytes("UTF-16LE"));
             fontData.write(new byte[42]);
             fontData.write(new byte[] { 0x14, 0x00, 0x04, 0x00 } );
             fontData.write(new byte[] { 0x01, 0x00, 0x00, 0x00 } );
             fontData.write(new byte[] { 0x00, 0x00, 0x15, 0x00 } );

             // Next part indicates that this is the last font
             fontData.write(new byte[] { 0x05, 0x00, 0x01, 0x00 } );

             fonts.add(fontData);
         }
         catch (IOException ioe) {
             // Shouldn't happen as this is a memory based stream
         }
     }
 }
	/**************************************************************
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*
	*************************************************************/



	package org.openoffice.xmerge.converter.xml.sxw.pocketword;

	import org.openoffice.xmerge.Document;
	import org.openoffice.xmerge.converter.xml.ParaStyle;
	import org.openoffice.xmerge.converter.xml.TextStyle;

	import java.io.InputStream;
	import java.io.IOException;
	import java.io.OutputStream;
	import java.io.ByteArrayOutputStream;
	import java.io.DataOutputStream;

	import java.util.Enumeration;
	import java.util.Vector;


	/**
	* <p>Class representing a Pocket Word Document.</p>
	*
	* <p><code>PocketWordDocument</code> is used to create new Pocket Word documents
	* and to read existing data to allow for conversion to OpenOffice.org XML Writer
	* format.</p>
	*
	* @author Mark Murnane
	* @version 1.1
	*/
	public class PocketWordDocument implements Document, PocketWordConstants {
	private String docName;

	private byte[] preamble;
	private Vector fonts;
	private DocumentDescriptor descriptor;
	private Vector paragraphs;

	private ParaStyle pStyle;
	private Paragraph currentPara;

	/*
	* The trailer currently appears to be constant, but if its found to
	* have a variable component, then this initialisation should be moved
	* to an initTrailer() method.
	*
	* Padding is sometimes needed before the trailer to ensure the file
	* ends on a 4-byte boundary, but this is handled in write().
	*/
	private static final byte[] trailer = new byte[] { (byte)0x82, 0x00,
	0x09, 0x00,
	0x03, 0x00,
	(byte)0x82, 0x00,
	0x00, 0x00,
	0x00, 0x00,
	0x00, 0x00,
	0x00, 0x00,
	0x00, 0x00 };


	/**
	* <p>Constructs a new Pocket Word Document.</p>
	*
	* <p>This new document does notcontain any information. Document data must
	* either be added using appropriate methods, or an existing file can be
	* {@link #read(InputStream) read} from an <code>InputStream</code>.</p>
	*
	* @param name The name of the <code>PocketWordDocument</code>.
	*/
	public PocketWordDocument(String name) {

	docName = trimDocumentName(name);

	preamble = new byte[52];
	fonts = new Vector(0, 1);
	descriptor = new DocumentDescriptor();
	paragraphs = new Vector(0, 1);
	}


	/**
	* <p>This method reads <code>byte</code> data from the InputStream and
	* extracts font and paragraph data from the file.</p>
	*
	* @param is InputStream containing a Pocket Word data file.
	*
	* @throws IOException In case of any I/O errors.
	*/
	public void read(InputStream docData) throws IOException {

	if (docData == null) {
	throw new IOException ("No input stream to convert");
	}

	// The preamble may become important for font declarations.
	int readValue = docData.read(preamble);
	// #i33702# check for an empty InputStream.
	if(readValue == -1) {
	System.err.println("Error:invalid input stream");
	return;
	}

	byte[] font = new byte[80];
	int numfonts = 0;
	do {
	docData.read(font);

	String name = new String(font, 0, 64, "UTF-16LE");
	fonts.add(name.trim());

	} while (!(font[76] == 5 && font[77] == 0
	&& font[78] == 1 && font[79] == 0));

	/*
	* TODO: The document descriptor data that follows the fonts ends with
	* a variable section containing data for each of the paragraphs.
	* It may be possible to use this information to calculate staring
	* positions for each paragraph rather than iterating through the
	* entire byte stream.
	*/

	int value;
	ByteArrayOutputStream bos = new ByteArrayOutputStream();
	while ((value = docData.read()) != -1) {
	bos.write(value);
	}


	byte[] contentData = bos.toByteArray();
	int start = 0, end = 0;
	boolean sawMarker = false;

	for (int i = 0; i < contentData.length; i += 4) {
	if (contentData[i + 2] == (byte)0xFF
	&& contentData[i + 3] == (byte)0xFF && !sawMarker) {
	start = i - 8;
	sawMarker = true;
	continue;
	}

	if (contentData[i + 2] == (byte)0xFF
	&& contentData[i + 3] == (byte)0xFF && sawMarker) {
	end = i - 8;
	ByteArrayOutputStream paragraph = new ByteArrayOutputStream();
	paragraph.write(contentData, start, end - start);
	paragraphs.add(new Paragraph(paragraph.toByteArray()));

	// Reset the markers
	sawMarker = false;
	i -= 4; // Skip back
	}

	}

	/*
	* Special case, the last paragraph
	* If we got here, and the marker is set then we saw the start of the
	* last paragraph, but no following paragraph
	*/
	ByteArrayOutputStream paragraph = new ByteArrayOutputStream();
	if (contentData[contentData.length - 19] == 0) {
	paragraph.write(contentData, start, contentData.length - start - 20);
	}
	else {
	paragraph.write(contentData, start, contentData.length - start - 18);
	}
	paragraphs.add(new Paragraph(paragraph.toByteArray()));
	}


	/*
	* Utility method to make sure the document name is stripped of any file
	* extensions before use.
	*/
	private String trimDocumentName(String name) {
	String temp = name.toLowerCase();

	if (temp.endsWith(FILE_EXTENSION)) {
	// strip the extension
	int nlen = name.length();
	int endIndex = nlen - FILE_EXTENSION.length();
	name = name.substring(0,endIndex);
	}

	return name;
	}


	/**
	* <p>Method to provide access to all of the <code>Paragraph</code> objects
	* in the <code>Document</code>.</p>
	*
	* @return <code>Enumeration</code> over the paragraphs in the document.
	*/
	public Enumeration getParagraphEnumeration() {
	return paragraphs.elements();
	}


	/**
	* <p>Returns the <code>Document</code> name with no file extension.</p>
	*
	* @return The <code>Document</code> name with no file extension.
	*/
	public String getName() {
	return docName;
	}


	/**
	* <p>Returns the <code>Document</code> name with file extension.</p>
	*
	* @return The <code>Document</code> name with file extension.
	*/
	public String getFileName() {
	return new String(docName + FILE_EXTENSION);
	}


	/**
	* <p>Writes out the <code>Document</code> content to the specified
	* <code>OutputStream</code>.</p>
	*
	* <p>This method may not be thread-safe.
	* Implementations may or may not synchronize this
	* method. User code (i.e. caller) must make sure that
	* calls to this method are thread-safe.</p>
	*
	* @param os <code>OutputStream</code> to write out the
	* <code>Document</code> content.
	*
	* @throws IOException If any I/O error occurs.
	*/
	public void write(OutputStream os) throws IOException {
	DataOutputStream dos = new DataOutputStream(os);

	initPreamble();
	dos.write(preamble);

	loadFonts();
	for (int i = 0; i < fonts.size(); i++ ) {
	ByteArrayOutputStream fontData = (ByteArrayOutputStream)fonts.elementAt(i);
	dos.write(fontData.toByteArray());
	}


	for (int i = 0; i < paragraphs.size(); i++) {
	Paragraph para = (Paragraph)paragraphs.elementAt(i);
	descriptor.addParagraph((short)para.getTextLength(), para.getLines());
	}
	dos.write(descriptor.getDescriptor());

	for (int i = 0; i < paragraphs.size(); i++ ) {
	Paragraph para = (Paragraph)paragraphs.elementAt(i);

	// Last paragraph has some extra data
	if (i + 1 == paragraphs.size()) {
	para.setLastParagraph(true);
	}
	dos.write(para.getParagraphData());
	}


	/*
	* Before we write out the trailer, we need to make sure that it will
	* lead to the file ending on a 4 byte boundary.
	*/
	if (dos.size() % 4 == 0) {
	dos.write((byte)0x00);
	dos.write((byte)0x00);
	}

	dos.write(trailer);

	dos.flush();
	dos.close();
	}


	/**
	* <p>This method adds a new paragraph element to the document. No string
	* data is added to the paragraph.</p>
	*
	* <p><b>N.B.</b> The newly added paragraph becomes the current paragraph and
	* is used as the target for all subsequent calls to addParagraphData().</p>
	*
	* @param style Paragraph Style object describing the formatting for
	* the new paragraph. Can be null.
	* @param listElement true if this paragraph is to be bulleted;
	* false otherwise.
	*/
	public void addParagraph(ParaStyle style, boolean listElement) {
	/* For the moment, only support basic text entry in a single paragraph */
	Paragraph para = new Paragraph(style);

	paragraphs.add(para);

	pStyle = style;
	currentPara = para;

	if (listElement) {
	para.setBullets(true);
	}
	}


	/**
	* <p>This method adds text to the current paragraph.</p>
	*
	* <p>If no paragraphs exist within the document, it creates one.</p>
	*
	* @param data The string data for this segment.
	* @param style Text Style object describing the formatting of this
	* segment. Can be null.
	*/
	public void addParagraphData(String data, TextStyle style) {
	if (currentPara == null) {
	addParagraph(null, false);
	}
	currentPara.addTextSegment(data, style);
	}


	/*
	* Preamble is the portion before font specification which never
	* seems to change from one file, or one saved version, to the next.
	*
	* Bytes 18h and 19h seem to contain the number of fonts and should
	* be modified when all of the fonts have been specified.
	* These bytes are the first two on the fourth line below.
	*/
	private void initPreamble() {
	preamble = new byte[] { 0x7B, 0x5C, 0x70, 0x77, 0x69, 0x15, 0x00, 0x00,
	0x01, 0x01, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x02, 0x00, 0x2C, 0x00, 0x01, 0x00, 0x0A, 0x00, // Bytes 3-4 Font??
	0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bytes 1-2 # Fonts
	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00 };
	}


	/*
	* This method writes the minimum font data that is used by the converter.
	* Currently, all documents convert to 10 point Courier New. Tahoma is
	* always mentioned in Pocket Word files, however, even if it is not used.
	*
	* TODO: Rewrite to allow for multiple fonts once font support issues
	* have been resolved.
	*/
	private void loadFonts() {
	ByteArrayOutputStream fontData = new ByteArrayOutputStream();

	try {
	fontData.write(new String("Tahoma").getBytes("UTF-16LE"));
	fontData.write(new byte[52]); // Rest of font name?
	fontData.write(new byte[] { 0x02, 0x00, 0x01, 0x00 } );
	fontData.write(new byte[] { 0x00, 0x00, 0x01, 0x00 } );
	fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } );
	fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } );

	fonts.add(fontData);

	fontData = new ByteArrayOutputStream();

	fontData.write(new String("Courier New").getBytes("UTF-16LE"));
	fontData.write(new byte[42]);
	fontData.write(new byte[] { 0x14, 0x00, 0x04, 0x00 } );
	fontData.write(new byte[] { 0x01, 0x00, 0x00, 0x00 } );
	fontData.write(new byte[] { 0x00, 0x00, 0x15, 0x00 } );

	// Next part indicates that this is the last font
	fontData.write(new byte[] { 0x05, 0x00, 0x01, 0x00 } );

	fonts.add(fontData);
	}
	catch (IOException ioe) {
	// Shouldn't happen as this is a memory based stream
	}
	}
	}