poi/src/main/java/org/apache/poi/hssf/record/SSTRecord.java - poi - Git at Google

 /* ====================================================================
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
    this work for additional information regarding copyright ownership.
    The ASF licenses this file to You under the Apache License, Version 2.0
    (the "License"); you may not use this file except in compliance with
    the License.  You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
 ==================================================================== */

 package org.apache.poi.hssf.record;

 import java.util.Iterator;
 import java.util.Map;
 import java.util.function.Supplier;

 import org.apache.poi.hssf.record.common.UnicodeString;
 import org.apache.poi.hssf.record.cont.ContinuableRecord;
 import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
 import org.apache.poi.util.GenericRecordUtil;
 import org.apache.poi.util.IntMapper;

 /**
  * Static String Table Record (0x00FC)<p>
  *
  * This holds all the strings for LabelSSTRecords.
  *
  * @see org.apache.poi.hssf.record.LabelSSTRecord
  * @see org.apache.poi.hssf.record.ContinueRecord
  */
 public final class SSTRecord extends ContinuableRecord {
     public static final short sid = 0x00FC;

     private static final UnicodeString EMPTY_STRING = new UnicodeString("");

     /**
      * union of strings in the SST and EXTSST
      */
     private int field_1_num_strings;

     /**
      * according to docs ONLY SST
      */
     private int field_2_num_unique_strings;
     private IntMapper<UnicodeString> field_3_strings;

     private SSTDeserializer deserializer;

     /**
      * Offsets from the beginning of the SST record (even across continuations)
      */
     private int[] bucketAbsoluteOffsets;
     /**
      * Offsets relative the start of the current SST or continue record
      */
     private int[] bucketRelativeOffsets;

     public SSTRecord() {
         field_1_num_strings = 0;
         field_2_num_unique_strings = 0;
         field_3_strings = new IntMapper<>();
         deserializer = new SSTDeserializer(field_3_strings);
     }

     public SSTRecord(SSTRecord other) {
         super(other);
         field_1_num_strings = other.field_1_num_strings;
         field_2_num_unique_strings = other.field_2_num_unique_strings;
         field_3_strings = other.field_3_strings.copy();
         deserializer = new SSTDeserializer(field_3_strings);
         bucketAbsoluteOffsets = (other.bucketAbsoluteOffsets == null) ? null : other.bucketAbsoluteOffsets.clone();
         bucketRelativeOffsets = (other.bucketRelativeOffsets == null) ? null : other.bucketRelativeOffsets.clone();
     }

     /**
      * Add a string.
      *
      * @param string string to be added
      *
      * @return the index of that string in the table
      */
     public int addString(UnicodeString string)
     {
         field_1_num_strings++;
         UnicodeString ucs = ( string == null ) ? EMPTY_STRING
                 : string;
         int rval;
         int index = field_3_strings.getIndex(ucs);

         if ( index != -1 ) {
             rval = index;
         } else {
             // This is a new string -- we didn't see it among the
             // strings we've already collected
             rval = field_3_strings.size();
             field_2_num_unique_strings++;
             SSTDeserializer.addToStringTable( field_3_strings, ucs );
         }
         return rval;
     }

     /**
      * @return number of strings
      */
     public int getNumStrings()
     {
         return field_1_num_strings;
     }

     /**
      * @return number of unique strings
      */
     public int getNumUniqueStrings()
     {
         return field_2_num_unique_strings;
     }


     /**
      * Get a particular string by its index
      *
      * @param id index into the array of strings
      *
      * @return the desired string
      */
     public UnicodeString getString(int id ) {
         return field_3_strings.get( id );
     }

     public short getSid() {
         return sid;
     }

     /**
      * Fill the fields from the data
      * <P>
      * The data consists of sets of string data. This string data is
      * arranged as follows:
      * </P>
      * <pre>
      * short  string_length;   // length of string data
      * byte   string_flag;     // flag specifying special string
      *                         // handling
      * short  run_count;       // optional count of formatting runs
      * int    extend_length;   // optional extension length
      * char[] string_data;     // string data, can be byte[] or
      *                         // short[] (length of array is
      *                         // string_length)
      * int[]  formatting_runs; // optional formatting runs (length of
      *                         // array is run_count)
      * byte[] extension;       // optional extension (length of array
      *                         // is extend_length)
      * </pre>
      * <P>
      * The string_flag is bit mapped as follows:
      * </P>
      * <P>
      * <TABLE summary="string_flag mapping">
      *   <TR>
      *      <TH>Bit number</TH>
      *      <TH>Meaning if 0</TH>
      *      <TH>Meaning if 1</TH>
      *   <TR>
      *   <TR>
      *      <TD>0</TD>
      *      <TD>string_data is byte[]</TD>
      *      <TD>string_data is short[]</TD>
      *   <TR>
      *   <TR>
      *      <TD>1</TD>
      *      <TD>Should always be 0</TD>
      *      <TD>string_flag is defective</TD>
      *   <TR>
      *   <TR>
      *      <TD>2</TD>
      *      <TD>extension is not included</TD>
      *      <TD>extension is included</TD>
      *   <TR>
      *   <TR>
      *      <TD>3</TD>
      *      <TD>formatting run data is not included</TD>
      *      <TD>formatting run data is included</TD>
      *   <TR>
      *   <TR>
      *      <TD>4</TD>
      *      <TD>Should always be 0</TD>
      *      <TD>string_flag is defective</TD>
      *   <TR>
      *   <TR>
      *      <TD>5</TD>
      *      <TD>Should always be 0</TD>
      *      <TD>string_flag is defective</TD>
      *   <TR>
      *   <TR>
      *      <TD>6</TD>
      *      <TD>Should always be 0</TD>
      *      <TD>string_flag is defective</TD>
      *   <TR>
      *   <TR>
      *      <TD>7</TD>
      *      <TD>Should always be 0</TD>
      *      <TD>string_flag is defective</TD>
      *   <TR>
      * </TABLE>
      * <P>
      * We can handle eating the overhead associated with bits 2 or 3
      * (or both) being set, but we have no idea what to do with the
      * associated data. The UnicodeString class can handle the byte[]
      * vs short[] nature of the actual string data
      *
      * @param in the RecordInputStream to read the record from
      */
     public SSTRecord(RecordInputStream in) {
         // this method is ALWAYS called after construction -- using
         // the nontrivial constructor, of course -- so this is where
         // we initialize our fields
         field_1_num_strings = in.readInt();
         field_2_num_unique_strings = in.readInt();
         field_3_strings = new IntMapper<>();

         deserializer = new SSTDeserializer(field_3_strings);
         // Bug 57456: some Excel Sheets send 0 as field=1, but have some random number in field_2,
         // we should not try to read the strings in this case.
         if(field_1_num_strings == 0) {
             field_2_num_unique_strings = 0;
             return;
         }
         deserializer.manufactureStrings( field_2_num_unique_strings, in );
     }


     /**
      * @return an iterator of the strings we hold. All instances are
      *         UnicodeStrings
      */
     Iterator<UnicodeString> getStrings()
     {
         return field_3_strings.iterator();
     }

     /**
      * @return count of the strings we hold.
      */
     int countStrings() {
         return field_3_strings.size();
     }

     protected void serialize(ContinuableRecordOutput out) {
         SSTSerializer serializer = new SSTSerializer(field_3_strings, getNumStrings(), getNumUniqueStrings() );
         serializer.serialize(out);
         bucketAbsoluteOffsets = serializer.getBucketAbsoluteOffsets();
         bucketRelativeOffsets = serializer.getBucketRelativeOffsets();
     }

     /**
      * Creates an extended string record based on the current contents of
      * the current SST record.  The offset within the stream to the SST record
      * is required because the extended string record points directly to the
      * strings in the SST record.
      * <p>
      * NOTE: THIS FUNCTION MUST ONLY BE CALLED AFTER THE SST RECORD HAS BEEN
      *       SERIALIZED.
      *
      * @param sstOffset     The offset in the stream to the start of the
      *                      SST record.
      * @return  The new SST record.
      */
     public ExtSSTRecord createExtSSTRecord(int sstOffset) {
         if (bucketAbsoluteOffsets == null || bucketRelativeOffsets == null) {
             throw new IllegalStateException("SST record has not yet been serialized.");
         }

         ExtSSTRecord extSST = new ExtSSTRecord();
         extSST.setNumStringsPerBucket((short)8);
         int[] absoluteOffsets = bucketAbsoluteOffsets.clone();
         int[] relativeOffsets = bucketRelativeOffsets.clone();
         for ( int i = 0; i < absoluteOffsets.length; i++ ) {
             absoluteOffsets[i] += sstOffset;
         }
         extSST.setBucketOffsets(absoluteOffsets, relativeOffsets);
         return extSST;
     }

     /**
      * Calculates the size in bytes of the EXTSST record as it would be if the
      * record was serialized.
      *
      * @return  The size of the ExtSST record in bytes.
      */
     public int calcExtSSTRecordSize() {
       return ExtSSTRecord.getRecordSizeForStrings(field_3_strings.size());
     }

     @Override
     public SSTRecord copy() {
         return new SSTRecord(this);
     }

     @Override
     public HSSFRecordTypes getGenericRecordType() {
         return HSSFRecordTypes.SST;
     }

     @Override
     public Map<String, Supplier<?>> getGenericProperties() {
         return GenericRecordUtil.getGenericProperties(
             "numStrings", this::getNumStrings,
             "numUniqueStrings", this::getNumUniqueStrings,
             "strings", () -> field_3_strings.getElements(),
             "bucketAbsoluteOffsets", () -> bucketAbsoluteOffsets,
             "bucketRelativeOffsets", () -> bucketRelativeOffsets
         );
     }
 }
	/* ====================================================================
	Licensed to the Apache Software Foundation (ASF) under one or more
	contributor license agreements. See the NOTICE file distributed with
	this work for additional information regarding copyright ownership.
	The ASF licenses this file to You under the Apache License, Version 2.0
	(the "License"); you may not use this file except in compliance with
	the License. You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	==================================================================== */

	package org.apache.poi.hssf.record;

	import java.util.Iterator;
	import java.util.Map;
	import java.util.function.Supplier;

	import org.apache.poi.hssf.record.common.UnicodeString;
	import org.apache.poi.hssf.record.cont.ContinuableRecord;
	import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
	import org.apache.poi.util.GenericRecordUtil;
	import org.apache.poi.util.IntMapper;

	/**
	* Static String Table Record (0x00FC)<p>
	*
	* This holds all the strings for LabelSSTRecords.
	*
	* @see org.apache.poi.hssf.record.LabelSSTRecord
	* @see org.apache.poi.hssf.record.ContinueRecord
	*/
	public final class SSTRecord extends ContinuableRecord {
	public static final short sid = 0x00FC;

	private static final UnicodeString EMPTY_STRING = new UnicodeString("");

	/**
	* union of strings in the SST and EXTSST
	*/
	private int field_1_num_strings;

	/**
	* according to docs ONLY SST
	*/
	private int field_2_num_unique_strings;
	private IntMapper<UnicodeString> field_3_strings;

	private SSTDeserializer deserializer;

	/**
	* Offsets from the beginning of the SST record (even across continuations)
	*/
	private int[] bucketAbsoluteOffsets;
	/**
	* Offsets relative the start of the current SST or continue record
	*/
	private int[] bucketRelativeOffsets;

	public SSTRecord() {
	field_1_num_strings = 0;
	field_2_num_unique_strings = 0;
	field_3_strings = new IntMapper<>();
	deserializer = new SSTDeserializer(field_3_strings);
	}

	public SSTRecord(SSTRecord other) {
	super(other);
	field_1_num_strings = other.field_1_num_strings;
	field_2_num_unique_strings = other.field_2_num_unique_strings;
	field_3_strings = other.field_3_strings.copy();
	deserializer = new SSTDeserializer(field_3_strings);
	bucketAbsoluteOffsets = (other.bucketAbsoluteOffsets == null) ? null : other.bucketAbsoluteOffsets.clone();
	bucketRelativeOffsets = (other.bucketRelativeOffsets == null) ? null : other.bucketRelativeOffsets.clone();
	}

	/**
	* Add a string.
	*
	* @param string string to be added
	*
	* @return the index of that string in the table
	*/
	public int addString(UnicodeString string)
	{
	field_1_num_strings++;
	UnicodeString ucs = ( string == null ) ? EMPTY_STRING
	: string;
	int rval;
	int index = field_3_strings.getIndex(ucs);

	if ( index != -1 ) {
	rval = index;
	} else {
	// This is a new string -- we didn't see it among the
	// strings we've already collected
	rval = field_3_strings.size();
	field_2_num_unique_strings++;
	SSTDeserializer.addToStringTable( field_3_strings, ucs );
	}
	return rval;
	}

	/**
	* @return number of strings
	*/
	public int getNumStrings()
	{
	return field_1_num_strings;
	}

	/**
	* @return number of unique strings
	*/
	public int getNumUniqueStrings()
	{
	return field_2_num_unique_strings;
	}


	/**
	* Get a particular string by its index
	*
	* @param id index into the array of strings
	*
	* @return the desired string
	*/
	public UnicodeString getString(int id ) {
	return field_3_strings.get( id );
	}

	public short getSid() {
	return sid;
	}

	/**
	* Fill the fields from the data
	* <P>
	* The data consists of sets of string data. This string data is
	* arranged as follows:
	* </P>
	* <pre>
	* short string_length; // length of string data
	* byte string_flag; // flag specifying special string
	* // handling
	* short run_count; // optional count of formatting runs
	* int extend_length; // optional extension length
	* char[] string_data; // string data, can be byte[] or
	* // short[] (length of array is
	* // string_length)
	* int[] formatting_runs; // optional formatting runs (length of
	* // array is run_count)
	* byte[] extension; // optional extension (length of array
	* // is extend_length)
	* </pre>
	* <P>
	* The string_flag is bit mapped as follows:
	* </P>
	* <P>
	* <TABLE summary="string_flag mapping">
	* <TR>
	* <TH>Bit number</TH>
	* <TH>Meaning if 0</TH>
	* <TH>Meaning if 1</TH>
	* <TR>
	* <TR>
	* <TD>0</TD>
	* <TD>string_data is byte[]</TD>
	* <TD>string_data is short[]</TD>
	* <TR>
	* <TR>
	* <TD>1</TD>
	* <TD>Should always be 0</TD>
	* <TD>string_flag is defective</TD>
	* <TR>
	* <TR>
	* <TD>2</TD>
	* <TD>extension is not included</TD>
	* <TD>extension is included</TD>
	* <TR>
	* <TR>
	* <TD>3</TD>
	* <TD>formatting run data is not included</TD>
	* <TD>formatting run data is included</TD>
	* <TR>
	* <TR>
	* <TD>4</TD>
	* <TD>Should always be 0</TD>
	* <TD>string_flag is defective</TD>
	* <TR>
	* <TR>
	* <TD>5</TD>
	* <TD>Should always be 0</TD>
	* <TD>string_flag is defective</TD>
	* <TR>
	* <TR>
	* <TD>6</TD>
	* <TD>Should always be 0</TD>
	* <TD>string_flag is defective</TD>
	* <TR>
	* <TR>
	* <TD>7</TD>
	* <TD>Should always be 0</TD>
	* <TD>string_flag is defective</TD>
	* <TR>
	* </TABLE>
	* <P>
	* We can handle eating the overhead associated with bits 2 or 3
	* (or both) being set, but we have no idea what to do with the
	* associated data. The UnicodeString class can handle the byte[]
	* vs short[] nature of the actual string data
	*
	* @param in the RecordInputStream to read the record from
	*/
	public SSTRecord(RecordInputStream in) {
	// this method is ALWAYS called after construction -- using
	// the nontrivial constructor, of course -- so this is where
	// we initialize our fields
	field_1_num_strings = in.readInt();
	field_2_num_unique_strings = in.readInt();
	field_3_strings = new IntMapper<>();

	deserializer = new SSTDeserializer(field_3_strings);
	// Bug 57456: some Excel Sheets send 0 as field=1, but have some random number in field_2,
	// we should not try to read the strings in this case.
	if(field_1_num_strings == 0) {
	field_2_num_unique_strings = 0;
	return;
	}
	deserializer.manufactureStrings( field_2_num_unique_strings, in );
	}


	/**
	* @return an iterator of the strings we hold. All instances are
	* UnicodeStrings
	*/
	Iterator<UnicodeString> getStrings()
	{
	return field_3_strings.iterator();
	}

	/**
	* @return count of the strings we hold.
	*/
	int countStrings() {
	return field_3_strings.size();
	}

	protected void serialize(ContinuableRecordOutput out) {
	SSTSerializer serializer = new SSTSerializer(field_3_strings, getNumStrings(), getNumUniqueStrings() );
	serializer.serialize(out);
	bucketAbsoluteOffsets = serializer.getBucketAbsoluteOffsets();
	bucketRelativeOffsets = serializer.getBucketRelativeOffsets();
	}

	/**
	* Creates an extended string record based on the current contents of
	* the current SST record. The offset within the stream to the SST record
	* is required because the extended string record points directly to the
	* strings in the SST record.
	* <p>
	* NOTE: THIS FUNCTION MUST ONLY BE CALLED AFTER THE SST RECORD HAS BEEN
	* SERIALIZED.
	*
	* @param sstOffset The offset in the stream to the start of the
	* SST record.
	* @return The new SST record.
	*/
	public ExtSSTRecord createExtSSTRecord(int sstOffset) {
	if (bucketAbsoluteOffsets == null \|\| bucketRelativeOffsets == null) {
	throw new IllegalStateException("SST record has not yet been serialized.");
	}

	ExtSSTRecord extSST = new ExtSSTRecord();
	extSST.setNumStringsPerBucket((short)8);
	int[] absoluteOffsets = bucketAbsoluteOffsets.clone();
	int[] relativeOffsets = bucketRelativeOffsets.clone();
	for ( int i = 0; i < absoluteOffsets.length; i++ ) {
	absoluteOffsets[i] += sstOffset;
	}
	extSST.setBucketOffsets(absoluteOffsets, relativeOffsets);
	return extSST;
	}

	/**
	* Calculates the size in bytes of the EXTSST record as it would be if the
	* record was serialized.
	*
	* @return The size of the ExtSST record in bytes.
	*/
	public int calcExtSSTRecordSize() {
	return ExtSSTRecord.getRecordSizeForStrings(field_3_strings.size());
	}

	@Override
	public SSTRecord copy() {
	return new SSTRecord(this);
	}

	@Override
	public HSSFRecordTypes getGenericRecordType() {
	return HSSFRecordTypes.SST;
	}

	@Override
	public Map<String, Supplier<?>> getGenericProperties() {
	return GenericRecordUtil.getGenericProperties(
	"numStrings", this::getNumStrings,
	"numUniqueStrings", this::getNumUniqueStrings,
	"strings", () -> field_3_strings.getElements(),
	"bucketAbsoluteOffsets", () -> bucketAbsoluteOffsets,
	"bucketRelativeOffsets", () -> bucketRelativeOffsets
	);
	}
	}