| /* ==================================================================== |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==================================================================== */ |
| |
| package org.apache.poi.xwpf.usermodel;
|
|
|
| import java.util.ArrayList;
|
| import java.util.List;
|
| import java.util.Map;
|
|
|
| import junit.framework.TestCase;
|
| import org.apache.poi.xwpf.XWPFTestDataSamples;
|
|
|
| public final class TestXWPFSDT extends TestCase {
|
|
|
| /**
|
| * Test simple tag and title extraction from SDT
|
| *
|
| * @throws Exception
|
| */
|
| public void testTagTitle() throws Exception {
|
| XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Bug54849.docx");
|
| String tag = null;
|
| String title = null;
|
| List<AbstractXWPFSDT> sdts = extractAllSDTs(doc);
|
| for (AbstractXWPFSDT sdt : sdts) {
|
| if (sdt.getContent().toString().equals("Rich_text")) {
|
| tag = "MyTag";
|
| title = "MyTitle";
|
| break;
|
| }
|
|
|
| }
|
| assertEquals("controls size", 13, sdts.size());
|
|
|
| assertEquals("tag", "MyTag", tag); |
| assertEquals("title", "MyTitle", title); |
| }
|
|
|
|
|
| public void testGetSDTs() throws Exception {
|
| String[] contents = new String[]{
|
| "header_rich_text",
|
| "Rich_text",
|
| "Rich_text_pre_table\nRich_text_cell1\t\t\t\n\t\t\t\n\t\t\t\n\nRich_text_post_table", |
| "Plain_text_no_newlines", |
| "Plain_text_with_newlines1\nplain_text_with_newlines2", |
| "Watermelon", |
| "Dirt", |
| "4/16/2013", |
| "Rich_text_in_cell", |
| "rich_text_in_paragraph_in_cell", |
| "Footer_rich_text", |
| "Footnote_sdt", |
| "Endnote_sdt" |
| |
| }; |
| XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Bug54849.docx"); |
| List<AbstractXWPFSDT> sdts = extractAllSDTs(doc); |
|
|
| assertEquals("number of sdts", contents.length, sdts.size());
|
|
|
| for (int i = 0; i < contents.length; i++) {
|
| AbstractXWPFSDT sdt = sdts.get(i);
|
| assertEquals(i + ": " + contents[i], contents[i], sdt.getContent().toString());
|
| }
|
| }
|
|
|
| /**
|
| * POI-54771 and TIKA-1317
|
| */
|
| public void testSDTAsCell() throws Exception { |
| //Bug54771a.docx and Bug54771b.docx test slightly |
| //different recursion patterns. Keep both! |
| XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Bug54771a.docx");
|
| List<AbstractXWPFSDT> sdts = extractAllSDTs(doc);
|
| String text = sdts.get(0).getContent().getText();
|
| assertEquals(2, sdts.size());
|
| assertTrue(text.indexOf("Test") > -1);
|
|
|
| text = sdts.get(1).getContent().getText();
|
| assertTrue(text.indexOf("Test Subtitle") > -1); |
| assertTrue(text.indexOf("Test User") > -1); |
| assertTrue(text.indexOf("Test") < text.indexOf("Test Subtitle")); |
|
|
| doc = XWPFTestDataSamples.openSampleDocument("Bug54771b.docx");
|
| sdts = extractAllSDTs(doc);
|
| assertEquals(3, sdts.size());
|
| assertTrue(sdts.get(0).getContent().getText().indexOf("Test") > -1);
|
|
|
| assertTrue(sdts.get(1).getContent().getText().indexOf("Test Subtitle") > -1);
|
| assertTrue(sdts.get(2).getContent().getText().indexOf("Test User") > -1);
|
|
|
| }
|
|
|
| /**
|
| * POI-55142 and Tika 1130
|
| */
|
| public void testNewLinesBetweenRuns() throws Exception {
|
| XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Bug55142.docx");
|
| List<AbstractXWPFSDT> sdts = extractAllSDTs(doc);
|
| List<String> targs = new ArrayList<String>();
|
| //these test newlines and tabs in paragraphs/body elements
|
| targs.add("Rich-text1 abcdefghi");
|
| targs.add("Rich-text2 abcd\t\tefgh");
|
| targs.add("Rich-text3 abcd\nefg");
|
| targs.add("Rich-text4 abcdefg");
|
| targs.add("Rich-text5 abcdefg\nhijk");
|
| targs.add("Plain-text1 abcdefg");
|
| targs.add("Plain-text2 abcdefg\nhijk\nlmnop");
|
| //this tests consecutive runs within a cell (not a paragraph)
|
| //this test case was triggered by Tika-1130
|
| targs.add("sdt_incell2 abcdefg");
|
|
|
| for (int i = 0; i < sdts.size(); i++) {
|
| AbstractXWPFSDT sdt = sdts.get(i);
|
| assertEquals(targs.get(i), targs.get(i), sdt.getContent().getText());
|
| }
|
| }
|
|
|
| private List<AbstractXWPFSDT> extractAllSDTs(XWPFDocument doc) {
|
|
|
| List<AbstractXWPFSDT> sdts = new ArrayList<AbstractXWPFSDT>();
|
|
|
| List<XWPFHeader> headers = doc.getHeaderList();
|
| for (XWPFHeader header : headers) {
|
| sdts.addAll(extractSDTsFromBodyElements(header.getBodyElements()));
|
| }
|
| sdts.addAll(extractSDTsFromBodyElements(doc.getBodyElements()));
|
|
|
| List<XWPFFooter> footers = doc.getFooterList();
|
| for (XWPFFooter footer : footers) {
|
| sdts.addAll(extractSDTsFromBodyElements(footer.getBodyElements()));
|
| }
|
|
|
| for (XWPFFootnote footnote : doc.getFootnotes()) {
|
| sdts.addAll(extractSDTsFromBodyElements(footnote.getBodyElements()));
|
| }
|
| for (Map.Entry<Integer, XWPFFootnote> e : doc.endnotes.entrySet()) {
|
| sdts.addAll(extractSDTsFromBodyElements(e.getValue().getBodyElements()));
|
| }
|
| return sdts;
|
| }
|
|
|
| private List<AbstractXWPFSDT> extractSDTsFromBodyElements(List<IBodyElement> elements) {
|
| List<AbstractXWPFSDT> sdts = new ArrayList<AbstractXWPFSDT>();
|
| for (IBodyElement e : elements) {
|
| if (e instanceof XWPFSDT) {
|
| XWPFSDT sdt = (XWPFSDT) e;
|
| sdts.add(sdt);
|
| } else if (e instanceof XWPFParagraph) {
|
|
|
| XWPFParagraph p = (XWPFParagraph) e;
|
| for (IRunElement e2 : p.getIRuns()) {
|
| if (e2 instanceof XWPFSDT) {
|
| XWPFSDT sdt = (XWPFSDT) e2;
|
| sdts.add(sdt);
|
| }
|
| }
|
| } else if (e instanceof XWPFTable) {
|
| XWPFTable table = (XWPFTable) e;
|
| sdts.addAll(extractSDTsFromTable(table));
|
| }
|
| }
|
| return sdts; |
| } |
| |
| private List<AbstractXWPFSDT> extractSDTsFromTable(XWPFTable table) { |
| |
| List<AbstractXWPFSDT> sdts = new ArrayList<AbstractXWPFSDT>(); |
| for (XWPFTableRow r : table.getRows()) {
|
| for (ICell c : r.getTableICells()) {
|
| if (c instanceof XWPFSDTCell) {
|
| sdts.add((XWPFSDTCell) c);
|
| } else if (c instanceof XWPFTableCell) {
|
| sdts.addAll(extractSDTsFromBodyElements(((XWPFTableCell) c).getBodyElements()));
|
| }
|
| }
|
| }
|
| return sdts; |
| } |
| } |