blob: b2206b86b7a1c9fb48686240fce70b7d65bba9ca [file] [log] [blame]
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.fail;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.record.common.UnicodeString;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.storage.RawDataUtil;
import org.apache.poi.util.HexRead;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LocaleUtil;
import org.junit.jupiter.api.Test;
final class TestSSTRecord {
/**
* decodes hexdump files and concatenates the results
* @param hexDumpFileNames names of sample files in the hssf test data directory
*/
private static byte[] concatHexDumps(String... hexDumpFileNames) throws IOException {
int nFiles = hexDumpFileNames.length;
ByteArrayOutputStream baos = new ByteArrayOutputStream(nFiles * 8228);
for (String sampleFileName : hexDumpFileNames) {
try (InputStream is = HSSFTestDataSamples.openSampleFileStream(sampleFileName)) {
BufferedReader br = new BufferedReader(new InputStreamReader(is, LocaleUtil.CHARSET_1252));
while (true) {
String line = br.readLine();
if (line == null) {
break;
}
baos.write(HexRead.readFromString(line));
}
}
}
return baos.toByteArray();
}
/**
* @param rawData serialization of one {@link SSTRecord} and zero or more {@link ContinueRecord}s
*/
private static SSTRecord createSSTFromRawData(byte[] rawData) {
RecordInputStream in = new RecordInputStream(new ByteArrayInputStream(rawData));
in.nextRecord();
SSTRecord result = new SSTRecord(in);
assertEquals(0, in.remaining());
assertFalse(in.hasNextRecord());
return result;
}
/**
* SST is often split over several {@link ContinueRecord}s
*/
@Test
void testContinuedRecord() throws IOException {
byte[] origData;
SSTRecord record;
byte[] ser_output;
origData = concatHexDumps("BigSSTRecord", "BigSSTRecordCR");
record = createSSTFromRawData(origData);
assertEquals( 1464, record.getNumStrings() );
assertEquals( 688, record.getNumUniqueStrings() );
assertEquals( 688, record.countStrings() );
ser_output = record.serialize();
assertArrayEquals(origData, ser_output);
// testing based on new bug report
origData = concatHexDumps("BigSSTRecord2", "BigSSTRecord2CR1", "BigSSTRecord2CR2", "BigSSTRecord2CR3",
"BigSSTRecord2CR4", "BigSSTRecord2CR5", "BigSSTRecord2CR6", "BigSSTRecord2CR7");
record = createSSTFromRawData(origData);
assertEquals( 158642, record.getNumStrings() );
assertEquals( 5249, record.getNumUniqueStrings() );
assertEquals( 5249, record.countStrings() );
ser_output = record.serialize();
// if (false) { // set true to observe make sure areSameSSTs() is working
// ser_output[11000] = 'X';
// }
SSTRecord rec2 = createSSTFromRawData(ser_output);
assertRecordEquals(record, rec2);
// if (false) {
// // TODO - trivial differences in ContinueRecord break locations
// // Sample data should be checked against what most recent Excel version produces.
// // maybe tweaks are required in ContinuableRecordOutput
// assertArrayEquals(origData, ser_output);
// }
}
/**
* Test capability of handling mondo big strings
*/
@Test
void testHugeStrings() {
SSTRecord record = new SSTRecord();
byte[][] bstrings =
{
new byte[9000], new byte[7433], new byte[9002],
new byte[16998]
};
UnicodeString[] strings = new UnicodeString[bstrings.length];
int total_length = 0;
for ( int k = 0; k < bstrings.length; k++ )
{
Arrays.fill( bstrings[k], (byte) ( 'a' + k ) );
strings[k] = new UnicodeString( new String(bstrings[k], LocaleUtil.CHARSET_1252) );
record.addString( strings[k] );
total_length += 3 + bstrings[k].length;
}
// add overhead of SST record
total_length += 8;
// add overhead of broken strings
total_length += 4;
// add overhead of six records
total_length += ( 6 * 4 );
byte[] content = new byte[record.getRecordSize()];
record.serialize( 0, content );
assertEquals( total_length, content.length );
//Deserialize the record.
RecordInputStream recStream = new RecordInputStream(new ByteArrayInputStream(content));
recStream.nextRecord();
record = new SSTRecord(recStream);
assertEquals( strings.length, record.getNumStrings() );
assertEquals( strings.length, record.getNumUniqueStrings() );
assertEquals( strings.length, record.countStrings() );
for ( int k = 0; k < strings.length; k++ )
{
assertEquals( strings[k], record.getString( k ) );
}
record = new SSTRecord();
bstrings[1] = new byte[bstrings[1].length - 1];
for ( int k = 0; k < bstrings.length; k++ )
{
if ( ( bstrings[k].length % 2 ) == 1 )
{
Arrays.fill( bstrings[k], (byte) ( 'a' + k ) );
strings[k] = new UnicodeString( new String(bstrings[k], LocaleUtil.CHARSET_1252) );
}
else
{
char[] data = new char[bstrings[k].length / 2];
Arrays.fill( data, (char) ( '\u2122' + k ) );
strings[k] = new UnicodeString(new String( data ));
}
record.addString( strings[k] );
}
content = new byte[record.getRecordSize()];
record.serialize( 0, content );
total_length--;
assertEquals( total_length, content.length );
recStream = new RecordInputStream(new ByteArrayInputStream(content));
recStream.nextRecord();
record = new SSTRecord(recStream);
assertEquals( strings.length, record.getNumStrings() );
assertEquals( strings.length, record.getNumUniqueStrings() );
assertEquals( strings.length, record.countStrings() );
for ( int k = 0; k < strings.length; k++ )
{
assertEquals( strings[k], record.getString( k ) );
}
}
/**
* test SSTRecord boundary conditions
*/
@Test
void testSSTRecordBug() {
// create an SSTRecord and write a certain pattern of strings
// to it ... then serialize it and verify the content
SSTRecord record = new SSTRecord();
// the record will start with two integers, then this string
// ... that will eat up 16 of the 8224 bytes that the record
// can hold
record.addString( new UnicodeString("Hello") );
// now we have an additional 8208 bytes, which is an exact
// multiple of 16 bytes
long testvalue = 1000000000000L;
for ( int k = 0; k < 2000; k++ )
{
record.addString( new UnicodeString(String.valueOf( testvalue++ )) );
}
byte[] content = new byte[record.getRecordSize()];
record.serialize( 0, content );
assertEquals(8224, LittleEndian.getShort(content, 2));
assertEquals(ContinueRecord.sid, LittleEndian.getShort(content, 8228));
assertEquals(8224, LittleEndian.getShort(content, 8228+2));
assertEquals( (byte) 13, content[4 + 8228] );
assertEquals(ContinueRecord.sid, LittleEndian.getShort(content, 2*8228));
assertEquals(8224, LittleEndian.getShort(content, 8228*2+2));
assertEquals( (byte) 13, content[4 + 8228 * 2] );
assertEquals(ContinueRecord.sid, LittleEndian.getShort(content, 3*8228));
assertEquals( (byte) 13, content[4 + 8228 * 3] );
}
/**
* test simple addString
*/
@Test
void testSimpleAddString() {
SSTRecord record = new SSTRecord();
UnicodeString s1 = new UnicodeString("Hello world");
// \u2122 is the encoding of the trademark symbol ...
UnicodeString s2 = new UnicodeString("Hello world\u2122");
assertEquals( 0, record.addString( s1 ) );
assertEquals( s1, record.getString( 0 ) );
assertEquals( 1, record.countStrings() );
assertEquals( 1, record.getNumStrings() );
assertEquals( 1, record.getNumUniqueStrings() );
assertEquals( 0, record.addString( s1 ) );
assertEquals( s1, record.getString( 0 ) );
assertEquals( 1, record.countStrings() );
assertEquals( 2, record.getNumStrings() );
assertEquals( 1, record.getNumUniqueStrings() );
assertEquals( 1, record.addString( s2 ) );
assertEquals( s2, record.getString( 1 ) );
assertEquals( 2, record.countStrings() );
assertEquals( 3, record.getNumStrings() );
assertEquals( 2, record.getNumUniqueStrings() );
Iterator<UnicodeString> iter = record.getStrings();
while ( iter.hasNext() ) {
UnicodeString ucs = iter.next();
if ( ucs.equals( s1 ) )
{
assertEquals( (byte) 0, ucs.getOptionFlags() );
}
else if ( ucs.equals( s2 ) )
{
assertEquals( (byte) 1, ucs.getOptionFlags() );
}
else
{
fail( "cannot match string: " + ucs.getString() );
}
}
}
/**
* test simple constructor
*/
@Test
void testSimpleConstructor() {
SSTRecord record = new SSTRecord();
assertEquals( 0, record.getNumStrings() );
assertEquals( 0, record.getNumUniqueStrings() );
assertEquals( 0, record.countStrings() );
byte[] output = record.serialize();
byte[] expected = {
(byte) record.getSid(), (byte) ( record.getSid() >> 8 ),
(byte) 8, (byte) 0, (byte) 0, (byte) 0, (byte) 0,
(byte) 0, (byte) 0, (byte) 0, (byte) 0, (byte) 0
};
assertArrayEquals(expected, output);
}
/**
* Tests that workbooks with rich text that duplicates a non rich text cell can be read and written.
*/
@Test
void testReadWriteDuplicatedRichText1() throws Exception {
try (HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("duprich1.xls")) {
HSSFSheet sheet = wb.getSheetAt(1);
assertEquals("01/05 (Wed)", sheet.getRow(0).getCell(8).getStringCellValue());
assertEquals("01/05 (Wed)", sheet.getRow(1).getCell(8).getStringCellValue());
HSSFTestDataSamples.writeOutAndReadBack(wb).close();
}
// test the second file.
try (HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("duprich2.xls")) {
HSSFSheet sheet = wb.getSheetAt(0);
int row = 0;
assertEquals("Testing", sheet.getRow(row++).getCell(0).getStringCellValue());
assertEquals("rich", sheet.getRow(row++).getCell(0).getStringCellValue());
assertEquals("text", sheet.getRow(row++).getCell(0).getStringCellValue());
assertEquals("strings", sheet.getRow(row++).getCell(0).getStringCellValue());
assertEquals("Testing", sheet.getRow(row++).getCell(0).getStringCellValue());
assertEquals("Testing", sheet.getRow(row).getCell(0).getStringCellValue());
HSSFTestDataSamples.writeOutAndReadBack(wb).close();
}
}
/**
* deep comparison of two SST records
*/
private static void assertRecordEquals(SSTRecord expected, SSTRecord actual){
assertEquals(expected.getNumStrings(), actual.getNumStrings(), "number of strings");
assertEquals(expected.getNumUniqueStrings(), actual.getNumUniqueStrings(), "number of unique strings");
assertEquals(expected.countStrings(), actual.countStrings(), "count of strings");
Iterator<UnicodeString> iterA = expected.getStrings();
Iterator<UnicodeString> iterB = actual.getStrings();
while (iterA.hasNext()) {
assertEquals(iterA.next(), iterB.next());
}
}
@Test
void test50779_1() throws IOException {
byte[] bytes = RawDataUtil.decompress("H4sIAAAAAAAAAL3aO2gVQRQG4HO5XkEZ8AESi4DBKoXFP+8IVioSvahcLxKC" +
"iRIRc9FEUZExCL4IYnyQqIi9SEATFUHRTkSQaBFsrEQLG8EiFgGrCM6uIGiff5c5e3ZY9l8W9mt2FqSjoyEixTBSW5kPm7EV29CNHa" +
"hjJ3ajgSb2oge92Id+HMAADuEwWjiCoxjCMI7hOE7iNM7gLM7hPC7gIi5hFJdxBWO4imu4jhsYxy3cBqTYtVRESU28/NnqMFJZ/Fgj" +
"lBjLiXGcGM+JCZyYyInp4sRsZMTo8sshxGhODEUBzVFAcxTQHAU0RwHNUUBzFNAcBQxHAcNRwHAUMBwFDEcBw1HAcBQwHAUMRwHDUc" +
"ByFLAcBSxHActRwHIUsBwFLEcBy1HAchSwHAUcRwHHUcBxFHAcBRxHAcdRwHEUcBwFHEcBx1HAcxTwHAU8RwHPUcBzFPAcBTxHAc9R" +
"wHMU8BwFAkeBwFEgcBQIHAUCR4HAUSBwFAgcBQJHgcBRIHIUiBwFIkeByFEgchSIHAUiR4HIUSBmBdZJ7aWItDXTcHNiz925lkoP+u" +
"oHVRof+dmnUrXVOajSQs/YKZVODE7v+jWxpbl9VKX9I929n/tVSndmb6pUkWfl//Tl5ZN/whtM4T7eYRL38BBf8R1PMZ9nfuBt2X3E" +
"E7zAl7KfwWs8Lrvn+YpXf2cn8Qjfcp3ZJI1KvuZDOT+F95jO9yn6opstu+IvflWW5lEVVR5XybJc2/JZVdplRa7rZXWunbIm1w2yVp" +
"bkN9yee9Kyg5gp/HfZAW3FQ1ce/694+A14Ha5/eSEAAA==");
RecordInputStream in = TestcaseRecordInputStream.create(bytes);
assertEquals(SSTRecord.sid, in.getSid());
SSTRecord src = new SSTRecord(in);
assertEquals(81, src.getNumStrings());
byte[] serialized = src.serialize();
in = TestcaseRecordInputStream.create(serialized);
assertEquals(SSTRecord.sid, in.getSid());
SSTRecord dst = new SSTRecord(in);
assertEquals(81, dst.getNumStrings());
assertRecordEquals(src, dst);
}
@Test
void test50779_2() throws IOException {
byte[] bytes = RawDataUtil.decompress("H4sIAAAAAAAAAL3Uu2sVQRjG4e9wOKKw4gUkKSyCVQqLd2d2ZhOwiiLqQSUe" +
"JIgRUcQc1ERUZBXBG0GMFxIiaC8S8I6FWIoIop2NlWBhI1hoIVhFcM6Cgn+Avx3m2+HbXd5hYJ9FGxgYNbPedNYY0SZt1hZtU1vbtV" +
"Oj6mi3xrRHe7VP+3VAh3RYXR3RUR3TpKZ0XCd1Wmd0Tud1QRd1SZc1rSu6qhld03Xd0E3Nal63JOuNnIlxTIxnYgomJjAxkYkpmZgh" +
"JmaYiMnrPweIQRTIGQVyRoGcUSBnFMgZBXJGgZxRIGcUcIwCjlHAMQo4RgHHKOAYBRyjgGMUcIwCjlHAMwp4RgHPKOAZBTyjgGcU8I" +
"wCnlHAMwp4RoGCUaBgFCgYBQpGgYJRoGAUKBgFCkaBglGgYBQIjAKBUSAwCgRGgcAoEBgFAqNAYBQIjAKBUSAyCkRGgcgoEBkFIqNA" +
"ZBSIjAKRUSAyCkRGgZJRoGQUKBkFSkaBklGgZBQoGQVKRoEyKdBvrdtm1tepJjtzu+5862bV/fH2wayaPftzPKua3cGJrFocmzmVVS" +
"cmHu34Nbexs3U6qxo2b6105kttfRof9VoPdU/vtKC7eqDP+qpn+pE63/WmXn3QU73Qp3r9Vq/0pF49T2+8/Ntd0GN9SbX3/H3dSxuz" +
"pi1Js2lZfV9ly1Lt22DLG6nTtLW2ItV1tjrVQVuTqrPWyvTZ/z+7YettoXcIfy4oeijNf6Pb+g0SIvVzNSEAAA==");
RecordInputStream in = TestcaseRecordInputStream.create(bytes);
assertEquals(SSTRecord.sid, in.getSid());
SSTRecord src = new SSTRecord(in);
assertEquals(81, src.getNumStrings());
byte[] serialized = src.serialize();
in = TestcaseRecordInputStream.create(serialized);
assertEquals(SSTRecord.sid, in.getSid());
SSTRecord dst = new SSTRecord(in);
assertEquals(81, dst.getNumStrings());
assertRecordEquals(src, dst);
}
@Test
void test57456() {
byte[] bytes = HexRead.readFromString("FC, 00, 08, 00, 00, 00, 00, 00, E1, 06, 00, 00");
RecordInputStream in = TestcaseRecordInputStream.create(bytes);
assertEquals(SSTRecord.sid, in.getSid());
SSTRecord src = new SSTRecord(in);
assertEquals(0, src.getNumStrings());
assertEquals(0, src.getNumUniqueStrings());
}
}