| /* ==================================================================== |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==================================================================== */ |
| |
| package org.apache.poi.hwpf.model; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collections; |
| import java.util.List; |
| |
| import org.apache.poi.util.IOUtils; |
| import org.apache.poi.util.Internal; |
| import org.apache.poi.util.LittleEndian; |
| |
| /** |
| * Represents a PAP FKP. The style properties for paragraph and character runs |
| * are stored in fkps. There are PAP fkps for paragraph properties and CHP fkps |
| * for character run properties. The first part of the fkp for both CHP and PAP |
| * fkps consists of an array of 4 byte int offsets in the main stream for that |
| * Paragraph's or Character run's text. The ending offset is the next |
| * value in the array. For example, if an fkp has X number of Paragraph's |
| * stored in it then there are (x + 1) 4 byte ints in the beginning array. The |
| * number X is determined by the last byte in a 512 byte fkp. |
| * |
| * CHP and PAP fkps also store the compressed styles(grpprl) that correspond to |
| * the offsets on the front of the fkp. The offset of the grpprls is determined |
| * differently for CHP fkps and PAP fkps. |
| */ |
| @Internal |
| public final class PAPFormattedDiskPage extends FormattedDiskPage { |
| private static final int BX_SIZE = 13; |
| private static final int FC_SIZE = 4; |
| |
| private ArrayList<PAPX> _papxList = new ArrayList<>(); |
| private ArrayList<PAPX> _overFlow; |
| |
| |
| |
| public PAPFormattedDiskPage() { } |
| |
| /** |
| * Creates a PAPFormattedDiskPage from a 512 byte array |
| */ |
| public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream, |
| int offset, CharIndexTranslator translator ) { |
| super( documentStream, offset ); |
| for ( int x = 0; x < _crun; x++ ) |
| { |
| int bytesStartAt = getStart( x ); |
| int bytesEndAt = getEnd( x ); |
| |
| // int charStartAt = translator.getCharIndex( bytesStartAt ); |
| // int charEndAt = translator.getCharIndex( bytesEndAt, charStartAt |
| // ); |
| // PAPX papx = new PAPX( charStartAt, charEndAt, getGrpprl( x ), |
| // getParagraphHeight( x ), dataStream ); |
| // _papxList.add( papx ); |
| |
| for ( int[] range : translator.getCharIndexRanges( bytesStartAt, |
| bytesEndAt ) ) |
| { |
| PAPX papx = new PAPX( range[0], range[1], getGrpprl( x ), |
| getParagraphHeight( x ), dataStream ); |
| _papxList.add( papx ); |
| } |
| } |
| _fkp = null; |
| } |
| |
| /** |
| * Fills the queue for writing. |
| * |
| * @param filler a List of PAPXs |
| */ |
| public void fill(List<PAPX> filler) |
| { |
| _papxList.addAll(filler); |
| } |
| |
| /** |
| * Used when writing out a Word docunment. This method is part of a sequence |
| * that is necessary because there is no easy and efficient way to |
| * determine the number PAPX's that will fit into one FKP. THe sequence is |
| * as follows: |
| * |
| * fill() |
| * toByteArray() |
| * getOverflow() |
| * |
| * @return The remaining PAPXs that didn't fit into this FKP. |
| */ |
| ArrayList<PAPX> getOverflow() |
| { |
| return _overFlow; |
| } |
| |
| /** |
| * Gets the PAPX at index. |
| * @param index The index to get the PAPX for. |
| * @return The PAPX at index. |
| */ |
| public PAPX getPAPX(int index) |
| { |
| return _papxList.get(index); |
| } |
| |
| public List<PAPX> getPAPXs() |
| { |
| return Collections.unmodifiableList( _papxList ); |
| } |
| |
| /** |
| * Gets the papx grpprl for the paragraph at index in this fkp. |
| * |
| * @param index The index of the papx to get. |
| * @return a papx grpprl. |
| */ |
| protected byte[] getGrpprl(int index) |
| { |
| int papxOffset = 2 * LittleEndian.getUByte(_fkp, _offset + (((_crun + 1) * FC_SIZE) + (index * BX_SIZE))); |
| int size = 2 * LittleEndian.getUByte(_fkp, _offset + papxOffset); |
| if(size == 0) { |
| size = 2 * LittleEndian.getUByte(_fkp, _offset + ++papxOffset); |
| } else { |
| size--; |
| } |
| |
| return IOUtils.safelyClone(_fkp, _offset + papxOffset + 1, size, 512); |
| } |
| |
| /** |
| * Creates a byte array representation of this data structure. Suitable for |
| * writing to a Word document. |
| * |
| * @param dataStream required if PAPX is too big to fit in FKP |
| * |
| * @return A byte array representing this data structure. |
| * @throws IOException |
| * if an I/O error occurs. |
| */ |
| protected byte[] toByteArray( ByteArrayOutputStream dataStream, |
| CharIndexTranslator translator ) throws IOException |
| { |
| byte[] buf = new byte[512]; |
| int size = _papxList.size(); |
| int grpprlOffset = 0; |
| int bxOffset = 0; |
| int fcOffset = 0; |
| byte[] lastGrpprl = new byte[0]; |
| |
| // total size is currently the size of one FC |
| int totalSize = FC_SIZE; |
| |
| int index = 0; |
| for ( ; index < size; index++ ) |
| { |
| byte[] grpprl = _papxList.get( index ).getGrpprl(); |
| int grpprlLength = grpprl.length; |
| |
| // is grpprl huge? |
| if ( grpprlLength > 488 ) |
| { |
| grpprlLength = 8; // set equal to size of sprmPHugePapx grpprl |
| } |
| |
| // check to see if we have enough room for an FC, a BX, and the |
| // grpprl |
| // and the 1 byte size of the grpprl. |
| int addition = 0; |
| if ( !Arrays.equals( grpprl, lastGrpprl ) ) |
| { |
| addition = ( FC_SIZE + BX_SIZE + grpprlLength + 1 ); |
| } |
| else |
| { |
| addition = ( FC_SIZE + BX_SIZE ); |
| } |
| |
| totalSize += addition; |
| |
| // if size is uneven we will have to add one so the first grpprl |
| // falls |
| // on a word boundary |
| if ( totalSize > 511 + ( index % 2 ) ) |
| { |
| totalSize -= addition; |
| break; |
| } |
| |
| // grpprls must fall on word boundaries |
| if ( grpprlLength % 2 > 0 ) |
| { |
| totalSize += 1; |
| } |
| else |
| { |
| totalSize += 2; |
| } |
| lastGrpprl = grpprl; |
| } |
| |
| // see if we couldn't fit some |
| if ( index != size ) |
| { |
| _overFlow = new ArrayList<>(); |
| _overFlow.addAll( _papxList.subList( index, size ) ); |
| } |
| |
| // index should equal number of papxs that will be in this fkp now. |
| buf[511] = (byte) index; |
| |
| bxOffset = ( FC_SIZE * index ) + FC_SIZE; |
| grpprlOffset = 511; |
| |
| PAPX papx = null; |
| lastGrpprl = new byte[0]; |
| for ( int x = 0; x < index; x++ ) |
| { |
| papx = _papxList.get( x ); |
| byte[] phe = papx.getParagraphHeight().toByteArray(); |
| byte[] grpprl = papx.getGrpprl(); |
| |
| // is grpprl huge? |
| if ( grpprl.length > 488 ) |
| { |
| // if so do we have storage at getHugeGrpprlOffset() |
| // int hugeGrpprlOffset = papx.getHugeGrpprlOffset(); |
| // if ( hugeGrpprlOffset == -1 ) // then we have no storage... |
| // { |
| // throw new UnsupportedOperationException( |
| // "This Paragraph has no dataStream storage." ); |
| // } |
| // we have some storage... |
| |
| // get the size of the existing storage |
| // int maxHugeGrpprlSize = LittleEndian.getUShort( dataStream, |
| // hugeGrpprlOffset ); |
| // |
| // if ( maxHugeGrpprlSize < grpprl.length - 2 ) |
| // { // grpprl.length-2 because we don't store the istd |
| // throw new UnsupportedOperationException( |
| // "This Paragraph's dataStream storage is too small." ); |
| // } |
| |
| // store grpprl at hugeGrpprlOffset |
| // grpprl.length-2 because we don't store the istd |
| // System.arraycopy( grpprl, 2, dataStream, hugeGrpprlOffset + |
| // 2, |
| // grpprl.length - 2 ); |
| // LittleEndian.putUShort( dataStream, hugeGrpprlOffset, |
| // grpprl.length - 2 ); |
| |
| byte[] hugePapx = Arrays.copyOfRange(grpprl, 2, grpprl.length); |
| int dataStreamOffset = dataStream.size(); |
| dataStream.write( hugePapx ); |
| |
| // grpprl = grpprl containing only a sprmPHugePapx2 |
| int istd = LittleEndian.getUShort( grpprl, 0 ); |
| |
| grpprl = new byte[8]; |
| LittleEndian.putUShort( grpprl, 0, istd ); |
| LittleEndian.putUShort( grpprl, 2, 0x6646 ); // sprmPHugePapx2 |
| LittleEndian.putInt( grpprl, 4, dataStreamOffset ); |
| } |
| |
| boolean same = Arrays.equals( lastGrpprl, grpprl ); |
| if ( !same ) |
| { |
| grpprlOffset -= ( grpprl.length + ( 2 - grpprl.length % 2 ) ); |
| grpprlOffset -= ( grpprlOffset % 2 ); |
| } |
| // LittleEndian.putInt( buf, fcOffset, papx.getStartBytes() ); |
| LittleEndian.putInt( buf, fcOffset, |
| translator.getByteIndex( papx.getStart() ) ); |
| buf[bxOffset] = (byte) ( grpprlOffset / 2 ); |
| System.arraycopy( phe, 0, buf, bxOffset + 1, phe.length ); |
| |
| /* |
| * refer to the section on PAPX in the spec. Places a size on the |
| * front of the PAPX. Has to do with how the grpprl stays on word |
| * boundaries. |
| */ |
| if ( !same ) |
| { |
| int copyOffset = grpprlOffset; |
| if ( ( grpprl.length % 2 ) > 0 ) |
| { |
| buf[copyOffset++] = (byte) ( ( grpprl.length + 1 ) / 2 ); |
| } |
| else |
| { |
| buf[++copyOffset] = (byte) ( ( grpprl.length ) / 2 ); |
| copyOffset++; |
| } |
| System.arraycopy( grpprl, 0, buf, copyOffset, grpprl.length ); |
| lastGrpprl = grpprl; |
| } |
| |
| bxOffset += BX_SIZE; |
| fcOffset += FC_SIZE; |
| |
| } |
| |
| if (papx != null) { |
| // LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin); |
| LittleEndian.putInt(buf, fcOffset, translator.getByteIndex(papx.getEnd())); |
| } |
| return buf; |
| } |
| |
| /** |
| * Used to get the ParagraphHeight of a PAPX at a particular index. |
| * @param index |
| * @return The ParagraphHeight |
| */ |
| private ParagraphHeight getParagraphHeight(int index) |
| { |
| int pheOffset = _offset + 1 + (((_crun + 1) * 4) + (index * 13)); |
| |
| return new ParagraphHeight(_fkp, pheOffset); |
| } |
| } |