log4j/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java - poi - Git at Google

 /* ====================================================================
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
    this work for additional information regarding copyright ownership.
    The ASF licenses this file to You under the Apache License, Version 2.0
    (the "License"); you may not use this file except in compliance with
    the License.  You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
 ==================================================================== */

 package org.apache.poi.hwpf.usermodel;

 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;

 import java.util.List;

 import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.hwpf.HWPFTestDataSamples;
 import org.apache.poi.hwpf.model.PAPX;
 import org.apache.poi.hwpf.model.StyleSheet;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;

 /**
  * Tests to ensure that our ranges end up with
  *  the right text in them, and the right font/styling
  *  properties applied to them.
  */
 public final class TestRangeProperties {
     private static final char page_break = (char)12;

     private static final String u_page_1 =
         "This is a fairly simple word document, over two pages, with headers and footers.\r" +
         "The trick with this one is that it contains some Unicode based strings in it.\r" +
         "Firstly, some currency symbols:\r" +
         "\tGBP - \u00a3\r" +
         "\tEUR - \u20ac\r" +
         "Now, we\u2019ll have some French text, in bold and big:\r" +
         "\tMoli\u00e8re\r" +
         "And some normal French text:\r" +
         "\tL'Avare ou l'\u00c9cole du mensonge\r" +
         "That\u2019s it for page one\r"
     ;
     private static final String u_page_2 =
         "This is page two. Les Pr\u00e9cieuses ridicules. The end.\r"
     ;

     private static final String a_page_1 =
         "I am a test document\r" +
         "This is page 1\r" +
         "I am Calibri (Body) in font size 11\r"
     ;
     private static final String a_page_2 =
         "This is page two\r" +
         "It\u2019s Arial Black in 16 point\r" +
         "It\u2019s also in blue\r"
     ;

     private HWPFDocument u;
     private HWPFDocument a;

     @BeforeEach
     void setUp() {
         u = HWPFTestDataSamples.openSampleFile("HeaderFooterUnicode.doc");
         a = HWPFTestDataSamples.openSampleFile("SampleDoc.doc");
     }

     @Test
     void testAsciiTextParagraphs() {
         Range r = a.getRange();
         assertEquals(
                 a_page_1 +
                 page_break + "\r" +
                 a_page_2,
                 r.text()
         );

         assertEquals(1, r.numSections());
         assertEquals(1, a.getSectionTable().getSections().size());
         Section s = r.getSection(0);
         assertEquals(
                 a_page_1 +
                 page_break + "\r" +
                 a_page_2,
                 s.text()
         );

         // no PAP reconstruction
         // assertEquals( 7, r.numParagraphs() );
         // with PAP reconstructon
         assertEquals( 8, r.numParagraphs() );

         String[] p1_parts = a_page_1.split("\r");
         String[] p2_parts = a_page_2.split("\r");

         // Check paragraph contents
         assertEquals(
                 p1_parts[0] + "\r",
                 r.getParagraph(0).text()
         );
         assertEquals(
                 p1_parts[1] + "\r",
                 r.getParagraph(1).text()
         );
         assertEquals(
                 p1_parts[2] + "\r",
                 r.getParagraph(2).text()
         );

         // no PAPX reconstruction
         // assertEquals( page_break + "\r", r.getParagraph( 3 ).text() );
         // assertEquals( p2_parts[0] + "\r", r.getParagraph( 4 ).text() );
         // assertEquals( p2_parts[1] + "\r", r.getParagraph( 5 ).text() );
         // assertEquals( p2_parts[2] + "\r", r.getParagraph( 6 ).text() );

         // with PAPX reconstruction
         assertEquals( "" + page_break, r.getParagraph( 3 ).text() );
         assertEquals( "\r", r.getParagraph( 4 ).text() );
         assertEquals( p2_parts[0] + "\r", r.getParagraph( 5 ).text() );
         assertEquals( p2_parts[1] + "\r", r.getParagraph( 6 ).text() );
         assertEquals( p2_parts[2] + "\r", r.getParagraph( 7 ).text() );
     }

     @Test
     void testAsciiStyling() {
         Range r = a.getRange();

         Paragraph p1 = r.getParagraph(0);
         Paragraph p7 = r.getParagraph(6);

         assertEquals(1, p1.numCharacterRuns());
         assertEquals(1, p7.numCharacterRuns());

         CharacterRun c1 = p1.getCharacterRun(0);
         CharacterRun c7 = p7.getCharacterRun(0);

         assertEquals("Times New Roman", c1.getFontName()); // No Calibri
         assertEquals("Arial Black", c7.getFontName());
         assertEquals(22, c1.getFontSize());
         assertEquals(32, c7.getFontSize());

         // This document has 15 styles
         assertEquals(15, a.getStyleSheet().numStyles());

         // Ensure none of the paragraphs refer to one that isn't there,
         //  and none of their character runs either
         // Also check all use the default style
         StyleSheet ss = a.getStyleSheet();
         for(int i=0; i<a.getRange().numParagraphs(); i++) {
             Paragraph p = a.getRange().getParagraph(i);
             int styleIndex = p.getStyleIndex();
             assertTrue(styleIndex < 15);
             assertEquals("Normal", ss.getStyleDescription(styleIndex).getName());
         }
         for(int i=0; i<a.getRange().numCharacterRuns(); i++) {
             CharacterRun c = a.getRange().getCharacterRun(i);
             int styleIndex = c.getStyleIndex();
             assertTrue(styleIndex < 15);
             assertEquals("Normal", ss.getStyleDescription(styleIndex).getName());
         }
     }

     /**
      * Tests the raw definitions of the paragraphs of
      *  a unicode document
      */
     @Test
     void testUnicodeParagraphDefinitions() {
         Range r = u.getRange();
         String[] p1_parts = u_page_1.split("\r");

         assertEquals(
                 u_page_1 + page_break + "\r" + u_page_2,
                 r.text()
         );
         assertEquals(
                 408, r.text().length()
         );


         assertEquals(1, r.numSections());
         assertEquals(1, u.getSectionTable().getSections().size());
         Section s = r.getSection(0);
         assertEquals(
                 u_page_1 +
                 page_break + "\r" +
                 u_page_2,
                 s.text()
         );
         assertEquals(0, s.getStartOffset());
         assertEquals(408, s.getEndOffset());


         List<PAPX> pDefs = r._paragraphs;
         // no PAPX reconstruction
         // assertEquals(36, pDefs.size());
         // with PAPX reconstruction
         assertEquals( 36, pDefs.size() );

         // Check that the last paragraph ends where it should do
         assertEquals(531, u.getOverallRange().text().length());
         PAPX pLast = pDefs.get(34);
         assertEquals(530, pLast.getEnd());

         // Only care about the first few really though
         PAPX p0 = pDefs.get(0);
         PAPX p1 = pDefs.get(1);
         PAPX p2 = pDefs.get(2);
         PAPX p3 = pDefs.get(3);
         PAPX p4 = pDefs.get(4);

         // 5 paragraphs should get us to the end of our text
         assertTrue(p0.getStart() < 408);
         assertTrue(p0.getEnd() < 408);
         assertTrue(p1.getStart() < 408);
         assertTrue(p1.getEnd() < 408);
         assertTrue(p2.getStart() < 408);
         assertTrue(p2.getEnd() < 408);
         assertTrue(p3.getStart() < 408);
         assertTrue(p3.getEnd() < 408);
         assertTrue(p4.getStart() < 408);
         assertTrue(p4.getEnd() < 408);

         // Paragraphs should match with lines
         assertEquals(
                 0,
                 p0.getStart()
         );
         assertEquals(
                 p1_parts[0].length() + 1,
                 p0.getEnd()
         );

         assertEquals(
                 p1_parts[0].length() + 1,
                 p1.getStart()
         );
         assertEquals(
                 p1_parts[0].length() + 1 +
                 p1_parts[1].length() + 1,
                 p1.getEnd()
         );

         assertEquals(
                 p1_parts[0].length() + 1 +
                 p1_parts[1].length() + 1,
                 p2.getStart()
         );
         assertEquals(
                 p1_parts[0].length() + 1 +
                 p1_parts[1].length() + 1 +
                 p1_parts[2].length() + 1,
                 p2.getEnd()
         );
 	}

     /**
      * Tests the paragraph text of a unicode document
      */
     @Test
     void testUnicodeTextParagraphs() {
         Range r = u.getRange();
         assertEquals(
                 u_page_1 +
                 page_break + "\r" +
                 u_page_2,
                 r.text()
         );

         // without PAPX reconstruction
         // assertEquals( 12, r.numParagraphs() );
         // with PAPX reconstruction
         assertEquals( 13, r.numParagraphs() );
         String[] p1_parts = u_page_1.split("\r");
         String[] p2_parts = u_page_2.split("\r");

         // Check text all matches up properly
         assertEquals(p1_parts[0] + "\r", r.getParagraph(0).text());
         assertEquals(p1_parts[1] + "\r", r.getParagraph(1).text());
         assertEquals(p1_parts[2] + "\r", r.getParagraph(2).text());
         assertEquals(p1_parts[3] + "\r", r.getParagraph(3).text());
         assertEquals(p1_parts[4] + "\r", r.getParagraph(4).text());
         assertEquals(p1_parts[5] + "\r", r.getParagraph(5).text());
         assertEquals(p1_parts[6] + "\r", r.getParagraph(6).text());
         assertEquals(p1_parts[7] + "\r", r.getParagraph(7).text());
         assertEquals(p1_parts[8] + "\r", r.getParagraph(8).text());
         assertEquals(p1_parts[9] + "\r", r.getParagraph(9).text());
         // without PAPX reconstruction
         // assertEquals(page_break + "\r", r.getParagraph(10).text());
         // assertEquals(p2_parts[0] + "\r", r.getParagraph(11).text());
         // with PAPX reconstruction
         assertEquals( page_break + "", r.getParagraph( 10 ).text() );
         assertEquals( "\r", r.getParagraph( 11 ).text() );
         assertEquals( p2_parts[0] + "\r", r.getParagraph( 12 ).text() );
     }

     @Test
     void testUnicodeStyling() {
         Range r = u.getRange();
         String[] p1_parts = u_page_1.split("\r");

         Paragraph p1 = r.getParagraph(0);
         Paragraph p7 = r.getParagraph(6);
         StyleSheet ss = r._doc.getStyleSheet();

         // Line ending in its own run each time!
         assertEquals(2, p1.numCharacterRuns());
         assertEquals(2, p7.numCharacterRuns());

         CharacterRun c1a = p1.getCharacterRun(0);
         CharacterRun c1b = p1.getCharacterRun(1);
         CharacterRun c7a = p7.getCharacterRun(0);
         CharacterRun c7b = p7.getCharacterRun(1);

         assertEquals("Times New Roman", c1a.getFontName()); // No Calibri
         assertEquals(22, c1a.getFontSize());

         assertEquals("Times New Roman", c1b.getFontName()); // No Calibri
         assertEquals(22, c1b.getFontSize());

         assertEquals("Times New Roman", c7a.getFontName());
         assertEquals(48, c7a.getFontSize());

         assertEquals("Times New Roman", c7b.getFontName());
         assertEquals(48, c7b.getFontSize());

         // All use the default base style
         assertEquals("Normal", ss.getStyleDescription(c1a.getStyleIndex()).getName());
         assertEquals("Normal", ss.getStyleDescription(c1b.getStyleIndex()).getName());
         assertEquals("Heading 1", ss.getStyleDescription(c7a.getStyleIndex()).getName());
         assertEquals("Heading 1", ss.getStyleDescription(c7b.getStyleIndex()).getName());

         // Now check where they crop up
         assertEquals(
                 0,
                 c1a.getStartOffset()
         );
         assertEquals(
                 p1_parts[0].length(),
                 c1a.getEndOffset()
         );

         assertEquals(
                 p1_parts[0].length(),
                 c1b.getStartOffset()
         );
         assertEquals(
                 p1_parts[0].length()+1,
                 c1b.getEndOffset()
         );

         assertEquals(
                 p1_parts[0].length() + 1 +
                 p1_parts[1].length() + 1 +
                 p1_parts[2].length() + 1 +
                 p1_parts[3].length() + 1 +
                 p1_parts[4].length() + 1 +
                 p1_parts[5].length() + 1,
                 c7a.getStartOffset()
         );
         assertEquals(
                 p1_parts[0].length() + 1 +
                 p1_parts[1].length() + 1 +
                 p1_parts[2].length() + 1 +
                 p1_parts[3].length() + 1 +
                 p1_parts[4].length() + 1 +
                 p1_parts[5].length() + 1 +
                 1,
                 c7a.getEndOffset()
         );

         assertEquals(
                 p1_parts[0].length() + 1 +
                 p1_parts[1].length() + 1 +
                 p1_parts[2].length() + 1 +
                 p1_parts[3].length() + 1 +
                 p1_parts[4].length() + 1 +
                 p1_parts[5].length() + 1 +
                 1,
                 c7b.getStartOffset()
         );
         assertEquals(
                 p1_parts[0].length() + 1 +
                 p1_parts[1].length() + 1 +
                 p1_parts[2].length() + 1 +
                 p1_parts[3].length() + 1 +
                 p1_parts[4].length() + 1 +
                 p1_parts[5].length() + 1 +
                 p1_parts[6].length() + 1,
                 c7b.getEndOffset()
         );

         // This document also has 22 styles
         assertEquals(22, ss.numStyles());

         // Ensure none of the paragraphs refer to one that isn't there,
         //  and none of their character runs either
         for(int i=0; i<r.numParagraphs(); i++) {
             Paragraph p = r.getParagraph(i);
             int styleIndex = p.getStyleIndex();
             assertTrue(styleIndex < 22);
             assertNotNull(ss.getStyleDescription(styleIndex).getName());
         }
         for(int i=0; i<r.numCharacterRuns(); i++) {
             CharacterRun c = r.getCharacterRun(i);
             int styleIndex = c.getStyleIndex();
             assertTrue(styleIndex < 22);
             assertNotNull(ss.getStyleDescription(styleIndex).getName());
         }
     }
 }
	/* ====================================================================
	Licensed to the Apache Software Foundation (ASF) under one or more
	contributor license agreements. See the NOTICE file distributed with
	this work for additional information regarding copyright ownership.
	The ASF licenses this file to You under the Apache License, Version 2.0
	(the "License"); you may not use this file except in compliance with
	the License. You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	==================================================================== */

	package org.apache.poi.hwpf.usermodel;

	import static org.junit.jupiter.api.Assertions.assertEquals;
	import static org.junit.jupiter.api.Assertions.assertNotNull;
	import static org.junit.jupiter.api.Assertions.assertTrue;

	import java.util.List;

	import org.apache.poi.hwpf.HWPFDocument;
	import org.apache.poi.hwpf.HWPFTestDataSamples;
	import org.apache.poi.hwpf.model.PAPX;
	import org.apache.poi.hwpf.model.StyleSheet;
	import org.junit.jupiter.api.BeforeEach;
	import org.junit.jupiter.api.Test;

	/**
	* Tests to ensure that our ranges end up with
	* the right text in them, and the right font/styling
	* properties applied to them.
	*/
	public final class TestRangeProperties {
	private static final char page_break = (char)12;

	private static final String u_page_1 =
	"This is a fairly simple word document, over two pages, with headers and footers.\r" +
	"The trick with this one is that it contains some Unicode based strings in it.\r" +
	"Firstly, some currency symbols:\r" +
	"\tGBP - \u00a3\r" +
	"\tEUR - \u20ac\r" +
	"Now, we\u2019ll have some French text, in bold and big:\r" +
	"\tMoli\u00e8re\r" +
	"And some normal French text:\r" +
	"\tL'Avare ou l'\u00c9cole du mensonge\r" +
	"That\u2019s it for page one\r"
	;
	private static final String u_page_2 =
	"This is page two. Les Pr\u00e9cieuses ridicules. The end.\r"
	;

	private static final String a_page_1 =
	"I am a test document\r" +
	"This is page 1\r" +
	"I am Calibri (Body) in font size 11\r"
	;
	private static final String a_page_2 =
	"This is page two\r" +
	"It\u2019s Arial Black in 16 point\r" +
	"It\u2019s also in blue\r"
	;

	private HWPFDocument u;
	private HWPFDocument a;

	@BeforeEach
	void setUp() {
	u = HWPFTestDataSamples.openSampleFile("HeaderFooterUnicode.doc");
	a = HWPFTestDataSamples.openSampleFile("SampleDoc.doc");
	}

	@Test
	void testAsciiTextParagraphs() {
	Range r = a.getRange();
	assertEquals(
	a_page_1 +
	page_break + "\r" +
	a_page_2,
	r.text()
	);

	assertEquals(1, r.numSections());
	assertEquals(1, a.getSectionTable().getSections().size());
	Section s = r.getSection(0);
	assertEquals(
	a_page_1 +
	page_break + "\r" +
	a_page_2,
	s.text()
	);

	// no PAP reconstruction
	// assertEquals( 7, r.numParagraphs() );
	// with PAP reconstructon
	assertEquals( 8, r.numParagraphs() );

	String[] p1_parts = a_page_1.split("\r");
	String[] p2_parts = a_page_2.split("\r");

	// Check paragraph contents
	assertEquals(
	p1_parts[0] + "\r",
	r.getParagraph(0).text()
	);
	assertEquals(
	p1_parts[1] + "\r",
	r.getParagraph(1).text()
	);
	assertEquals(
	p1_parts[2] + "\r",
	r.getParagraph(2).text()
	);

	// no PAPX reconstruction
	// assertEquals( page_break + "\r", r.getParagraph( 3 ).text() );
	// assertEquals( p2_parts[0] + "\r", r.getParagraph( 4 ).text() );
	// assertEquals( p2_parts[1] + "\r", r.getParagraph( 5 ).text() );
	// assertEquals( p2_parts[2] + "\r", r.getParagraph( 6 ).text() );

	// with PAPX reconstruction
	assertEquals( "" + page_break, r.getParagraph( 3 ).text() );
	assertEquals( "\r", r.getParagraph( 4 ).text() );
	assertEquals( p2_parts[0] + "\r", r.getParagraph( 5 ).text() );
	assertEquals( p2_parts[1] + "\r", r.getParagraph( 6 ).text() );
	assertEquals( p2_parts[2] + "\r", r.getParagraph( 7 ).text() );
	}

	@Test
	void testAsciiStyling() {
	Range r = a.getRange();

	Paragraph p1 = r.getParagraph(0);
	Paragraph p7 = r.getParagraph(6);

	assertEquals(1, p1.numCharacterRuns());
	assertEquals(1, p7.numCharacterRuns());

	CharacterRun c1 = p1.getCharacterRun(0);
	CharacterRun c7 = p7.getCharacterRun(0);

	assertEquals("Times New Roman", c1.getFontName()); // No Calibri
	assertEquals("Arial Black", c7.getFontName());
	assertEquals(22, c1.getFontSize());
	assertEquals(32, c7.getFontSize());

	// This document has 15 styles
	assertEquals(15, a.getStyleSheet().numStyles());

	// Ensure none of the paragraphs refer to one that isn't there,
	// and none of their character runs either
	// Also check all use the default style
	StyleSheet ss = a.getStyleSheet();
	for(int i=0; i<a.getRange().numParagraphs(); i++) {
	Paragraph p = a.getRange().getParagraph(i);
	int styleIndex = p.getStyleIndex();
	assertTrue(styleIndex < 15);
	assertEquals("Normal", ss.getStyleDescription(styleIndex).getName());
	}
	for(int i=0; i<a.getRange().numCharacterRuns(); i++) {
	CharacterRun c = a.getRange().getCharacterRun(i);
	int styleIndex = c.getStyleIndex();
	assertTrue(styleIndex < 15);
	assertEquals("Normal", ss.getStyleDescription(styleIndex).getName());
	}
	}

	/**
	* Tests the raw definitions of the paragraphs of
	* a unicode document
	*/
	@Test
	void testUnicodeParagraphDefinitions() {
	Range r = u.getRange();
	String[] p1_parts = u_page_1.split("\r");

	assertEquals(
	u_page_1 + page_break + "\r" + u_page_2,
	r.text()
	);
	assertEquals(
	408, r.text().length()
	);


	assertEquals(1, r.numSections());
	assertEquals(1, u.getSectionTable().getSections().size());
	Section s = r.getSection(0);
	assertEquals(
	u_page_1 +
	page_break + "\r" +
	u_page_2,
	s.text()
	);
	assertEquals(0, s.getStartOffset());
	assertEquals(408, s.getEndOffset());


	List<PAPX> pDefs = r._paragraphs;
	// no PAPX reconstruction
	// assertEquals(36, pDefs.size());
	// with PAPX reconstruction
	assertEquals( 36, pDefs.size() );

	// Check that the last paragraph ends where it should do
	assertEquals(531, u.getOverallRange().text().length());
	PAPX pLast = pDefs.get(34);
	assertEquals(530, pLast.getEnd());

	// Only care about the first few really though
	PAPX p0 = pDefs.get(0);
	PAPX p1 = pDefs.get(1);
	PAPX p2 = pDefs.get(2);
	PAPX p3 = pDefs.get(3);
	PAPX p4 = pDefs.get(4);

	// 5 paragraphs should get us to the end of our text
	assertTrue(p0.getStart() < 408);
	assertTrue(p0.getEnd() < 408);
	assertTrue(p1.getStart() < 408);
	assertTrue(p1.getEnd() < 408);
	assertTrue(p2.getStart() < 408);
	assertTrue(p2.getEnd() < 408);
	assertTrue(p3.getStart() < 408);
	assertTrue(p3.getEnd() < 408);
	assertTrue(p4.getStart() < 408);
	assertTrue(p4.getEnd() < 408);

	// Paragraphs should match with lines
	assertEquals(
	0,
	p0.getStart()
	);
	assertEquals(
	p1_parts[0].length() + 1,
	p0.getEnd()
	);

	assertEquals(
	p1_parts[0].length() + 1,
	p1.getStart()
	);
	assertEquals(
	p1_parts[0].length() + 1 +
	p1_parts[1].length() + 1,
	p1.getEnd()
	);

	assertEquals(
	p1_parts[0].length() + 1 +
	p1_parts[1].length() + 1,
	p2.getStart()
	);
	assertEquals(
	p1_parts[0].length() + 1 +
	p1_parts[1].length() + 1 +
	p1_parts[2].length() + 1,
	p2.getEnd()
	);
	}

	/**
	* Tests the paragraph text of a unicode document
	*/
	@Test
	void testUnicodeTextParagraphs() {
	Range r = u.getRange();
	assertEquals(
	u_page_1 +
	page_break + "\r" +
	u_page_2,
	r.text()
	);

	// without PAPX reconstruction
	// assertEquals( 12, r.numParagraphs() );
	// with PAPX reconstruction
	assertEquals( 13, r.numParagraphs() );
	String[] p1_parts = u_page_1.split("\r");
	String[] p2_parts = u_page_2.split("\r");

	// Check text all matches up properly
	assertEquals(p1_parts[0] + "\r", r.getParagraph(0).text());
	assertEquals(p1_parts[1] + "\r", r.getParagraph(1).text());
	assertEquals(p1_parts[2] + "\r", r.getParagraph(2).text());
	assertEquals(p1_parts[3] + "\r", r.getParagraph(3).text());
	assertEquals(p1_parts[4] + "\r", r.getParagraph(4).text());
	assertEquals(p1_parts[5] + "\r", r.getParagraph(5).text());
	assertEquals(p1_parts[6] + "\r", r.getParagraph(6).text());
	assertEquals(p1_parts[7] + "\r", r.getParagraph(7).text());
	assertEquals(p1_parts[8] + "\r", r.getParagraph(8).text());
	assertEquals(p1_parts[9] + "\r", r.getParagraph(9).text());
	// without PAPX reconstruction
	// assertEquals(page_break + "\r", r.getParagraph(10).text());
	// assertEquals(p2_parts[0] + "\r", r.getParagraph(11).text());
	// with PAPX reconstruction
	assertEquals( page_break + "", r.getParagraph( 10 ).text() );
	assertEquals( "\r", r.getParagraph( 11 ).text() );
	assertEquals( p2_parts[0] + "\r", r.getParagraph( 12 ).text() );
	}

	@Test
	void testUnicodeStyling() {
	Range r = u.getRange();
	String[] p1_parts = u_page_1.split("\r");

	Paragraph p1 = r.getParagraph(0);
	Paragraph p7 = r.getParagraph(6);
	StyleSheet ss = r._doc.getStyleSheet();

	// Line ending in its own run each time!
	assertEquals(2, p1.numCharacterRuns());
	assertEquals(2, p7.numCharacterRuns());

	CharacterRun c1a = p1.getCharacterRun(0);
	CharacterRun c1b = p1.getCharacterRun(1);
	CharacterRun c7a = p7.getCharacterRun(0);
	CharacterRun c7b = p7.getCharacterRun(1);

	assertEquals("Times New Roman", c1a.getFontName()); // No Calibri
	assertEquals(22, c1a.getFontSize());

	assertEquals("Times New Roman", c1b.getFontName()); // No Calibri
	assertEquals(22, c1b.getFontSize());

	assertEquals("Times New Roman", c7a.getFontName());
	assertEquals(48, c7a.getFontSize());

	assertEquals("Times New Roman", c7b.getFontName());
	assertEquals(48, c7b.getFontSize());

	// All use the default base style
	assertEquals("Normal", ss.getStyleDescription(c1a.getStyleIndex()).getName());
	assertEquals("Normal", ss.getStyleDescription(c1b.getStyleIndex()).getName());
	assertEquals("Heading 1", ss.getStyleDescription(c7a.getStyleIndex()).getName());
	assertEquals("Heading 1", ss.getStyleDescription(c7b.getStyleIndex()).getName());

	// Now check where they crop up
	assertEquals(
	0,
	c1a.getStartOffset()
	);
	assertEquals(
	p1_parts[0].length(),
	c1a.getEndOffset()
	);

	assertEquals(
	p1_parts[0].length(),
	c1b.getStartOffset()
	);
	assertEquals(
	p1_parts[0].length()+1,
	c1b.getEndOffset()
	);

	assertEquals(
	p1_parts[0].length() + 1 +
	p1_parts[1].length() + 1 +
	p1_parts[2].length() + 1 +
	p1_parts[3].length() + 1 +
	p1_parts[4].length() + 1 +
	p1_parts[5].length() + 1,
	c7a.getStartOffset()
	);
	assertEquals(
	p1_parts[0].length() + 1 +
	p1_parts[1].length() + 1 +
	p1_parts[2].length() + 1 +
	p1_parts[3].length() + 1 +
	p1_parts[4].length() + 1 +
	p1_parts[5].length() + 1 +
	1,
	c7a.getEndOffset()
	);

	assertEquals(
	p1_parts[0].length() + 1 +
	p1_parts[1].length() + 1 +
	p1_parts[2].length() + 1 +
	p1_parts[3].length() + 1 +
	p1_parts[4].length() + 1 +
	p1_parts[5].length() + 1 +
	1,
	c7b.getStartOffset()
	);
	assertEquals(
	p1_parts[0].length() + 1 +
	p1_parts[1].length() + 1 +
	p1_parts[2].length() + 1 +
	p1_parts[3].length() + 1 +
	p1_parts[4].length() + 1 +
	p1_parts[5].length() + 1 +
	p1_parts[6].length() + 1,
	c7b.getEndOffset()
	);

	// This document also has 22 styles
	assertEquals(22, ss.numStyles());

	// Ensure none of the paragraphs refer to one that isn't there,
	// and none of their character runs either
	for(int i=0; i<r.numParagraphs(); i++) {
	Paragraph p = r.getParagraph(i);
	int styleIndex = p.getStyleIndex();
	assertTrue(styleIndex < 22);
	assertNotNull(ss.getStyleDescription(styleIndex).getName());
	}
	for(int i=0; i<r.numCharacterRuns(); i++) {
	CharacterRun c = r.getCharacterRun(i);
	int styleIndex = c.getStyleIndex();
	assertTrue(styleIndex < 22);
	assertNotNull(ss.getStyleDescription(styleIndex).getName());
	}
	}
	}