tag r1807428 as 3.17
git-svn-id: https://svn.apache.org/repos/asf/poi/tags/REL_3_17_FINAL@1807429 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/src/java/org/apache/poi/ss/formula/FormulaParser.java b/src/java/org/apache/poi/ss/formula/FormulaParser.java
index 8d0653b..fd17aca 100644
--- a/src/java/org/apache/poi/ss/formula/FormulaParser.java
+++ b/src/java/org/apache/poi/ss/formula/FormulaParser.java
@@ -736,7 +736,7 @@
// Done reading from input stream
// Ok to return now
- if (isTotalsSpec && !tbl.isHasTotalsRow()) {
+ if (isTotalsSpec && tbl.getTotalsRowCount() == 0) {
return new ParseNode(ErrPtg.REF_INVALID);
}
if ((isThisRow || isThisRowSpec) && (_rowIndex < startRow || endRow < _rowIndex)) {
@@ -759,14 +759,14 @@
if (nSpecQuantifiers == 1 && isAllSpec) {
//do nothing
} else if (isDataSpec && isHeadersSpec) {
- if (tbl.isHasTotalsRow()) {
+ if (tbl.getTotalsRowCount() > 0) {
actualEndRow = endRow - 1;
}
} else if (isDataSpec && isTotalsSpec) {
actualStartRow = startRow + 1;
} else if (nSpecQuantifiers == 1 && isDataSpec) {
actualStartRow = startRow + 1;
- if (tbl.isHasTotalsRow()) {
+ if (tbl.getTotalsRowCount() > 0) {
actualEndRow = endRow - 1;
}
} else if (nSpecQuantifiers == 1 && isHeadersSpec) {
@@ -785,7 +785,7 @@
actualEndRow = _rowIndex;
} else { // Really no special quantifiers
actualStartRow++;
- if (tbl.isHasTotalsRow()) actualEndRow--;
+ if (tbl.getTotalsRowCount() > 0) actualEndRow--;
}
}
diff --git a/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java b/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java
index 911eef6..7452da8 100644
--- a/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java
+++ b/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java
@@ -33,6 +33,7 @@
import org.apache.poi.openxml4j.opc.PackageNamespaces;
import org.apache.poi.util.DocumentHelper;
+import org.apache.poi.util.Removal;
import org.apache.xmlbeans.SchemaType;
import org.apache.xmlbeans.SchemaTypeLoader;
import org.apache.xmlbeans.XmlBeans;
@@ -49,7 +50,7 @@
@SuppressWarnings("deprecation")
public class POIXMLTypeLoader {
- private static ThreadLocal<ClassLoader> classLoader = new ThreadLocal<ClassLoader>();
+ private static ThreadLocal<SchemaTypeLoader> typeLoader = new ThreadLocal<SchemaTypeLoader>();
// TODO: Do these have a good home like o.a.p.openxml4j.opc.PackageNamespaces and PackageRelationshipTypes?
// These constants should be common to all of POI and easy to use by other applications such as Tika
@@ -109,20 +110,26 @@
* when the user code is finalized.
*
* @param cl the classloader to be used when XmlBeans classes and definitions are looked up
+ * @deprecated in POI 3.17 - setting a classloader from the outside is now obsolete,
+ * the classloader of the SchemaType will be used
*/
+ @Deprecated
+ @Removal(version="4.0")
public static void setClassLoader(ClassLoader cl) {
- classLoader.set(cl);
}
- private static SchemaTypeLoader getTypeLoader() {
- ClassLoader cl = classLoader.get();
- return (cl == null)
- ? XmlBeans.getContextTypeLoader()
- : XmlBeans.typeLoaderForClassLoader(cl);
+ private static SchemaTypeLoader getTypeLoader(SchemaType type) {
+ SchemaTypeLoader tl = typeLoader.get();
+ if (tl == null) {
+ ClassLoader cl = type.getClass().getClassLoader();
+ tl = XmlBeans.typeLoaderForClassLoader(cl);
+ typeLoader.set(tl);
+ }
+ return tl;
}
public static XmlObject newInstance(SchemaType type, XmlOptions options) {
- return getTypeLoader().newInstance(type, getXmlOptions(options));
+ return getTypeLoader(type).newInstance(type, getXmlOptions(options));
}
public static XmlObject parse(String xmlText, SchemaType type, XmlOptions options) throws XmlException {
@@ -154,34 +161,34 @@
public static XmlObject parse(InputStream jiois, SchemaType type, XmlOptions options) throws XmlException, IOException {
try {
Document doc = DocumentHelper.readDocument(jiois);
- return getTypeLoader().parse(doc.getDocumentElement(), type, getXmlOptions(options));
+ return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options));
} catch (SAXException e) {
throw new IOException("Unable to parse xml bean", e);
}
}
public static XmlObject parse(XMLStreamReader xsr, SchemaType type, XmlOptions options) throws XmlException {
- return getTypeLoader().parse(xsr, type, getXmlOptions(options));
+ return getTypeLoader(type).parse(xsr, type, getXmlOptions(options));
}
public static XmlObject parse(Reader jior, SchemaType type, XmlOptions options) throws XmlException, IOException {
try {
Document doc = DocumentHelper.readDocument(new InputSource(jior));
- return getTypeLoader().parse(doc.getDocumentElement(), type, getXmlOptions(options));
+ return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options));
} catch (SAXException e) {
throw new XmlException("Unable to parse xml bean", e);
}
}
public static XmlObject parse(Node node, SchemaType type, XmlOptions options) throws XmlException {
- return getTypeLoader().parse(node, type, getXmlOptions(options));
+ return getTypeLoader(type).parse(node, type, getXmlOptions(options));
}
public static XmlObject parse(XMLInputStream xis, SchemaType type, XmlOptions options) throws XmlException, XMLStreamException {
- return getTypeLoader().parse(xis, type, getXmlOptions(options));
+ return getTypeLoader(type).parse(xis, type, getXmlOptions(options));
}
public static XMLInputStream newValidatingXMLInputStream ( XMLInputStream xis, SchemaType type, XmlOptions options ) throws XmlException, XMLStreamException {
- return getTypeLoader().newValidatingXMLInputStream(xis, type, getXmlOptions(options));
+ return getTypeLoader(type).newValidatingXMLInputStream(xis, type, getXmlOptions(options));
}
}
diff --git a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
index 070f0c1..f029829 100644
--- a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
@@ -33,6 +33,7 @@
import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRelation;
+import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFSDT;
import org.apache.poi.xwpf.usermodel.XWPFSDTCell;
import org.apache.poi.xwpf.usermodel.XWPFTable;
@@ -53,6 +54,7 @@
private XWPFDocument document;
private boolean fetchHyperlinks = false;
+ private boolean concatenatePhoneticRuns = true;
public XWPFWordExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
this(new XWPFDocument(container));
@@ -86,6 +88,14 @@
fetchHyperlinks = fetch;
}
+ /**
+ * Should we concatenate phonetic runs in extraction. Default is <code>true</code>
+ * @param concatenatePhoneticRuns
+ */
+ public void setConcatenatePhoneticRuns(boolean concatenatePhoneticRuns) {
+ this.concatenatePhoneticRuns = concatenatePhoneticRuns;
+ }
+
public String getText() {
StringBuffer text = new StringBuffer();
XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
@@ -130,7 +140,11 @@
for (IRunElement run : paragraph.getRuns()) {
- text.append(run);
+ if (! concatenatePhoneticRuns && run instanceof XWPFRun) {
+ text.append(((XWPFRun)run).text());
+ } else {
+ text.append(run);
+ }
if (run instanceof XWPFHyperlinkRun && fetchHyperlinks) {
XWPFHyperlink link = ((XWPFHyperlinkRun) run).getHyperlink(document);
if (link != null)
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java
index 85289c5..80b56c7 100644
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java
@@ -68,6 +68,8 @@
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRuby;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRubyContent;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSignedHpsMeasure;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSignedTwipsMeasure;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
@@ -1042,10 +1044,15 @@
}
/**
- * Returns the string version of the text
+ * Returns the string version of the text and the phonetic string
*/
public String toString() {
- return text();
+ String phonetic = getPhonetic();
+ if (phonetic.length() > 0) {
+ return text() +" ("+phonetic.toString()+")";
+ } else {
+ return text();
+ }
}
/**
@@ -1061,71 +1068,136 @@
c.selectPath("./*");
while (c.toNextSelection()) {
XmlObject o = c.getObject();
- if (o instanceof CTText) {
+ if (o instanceof CTRuby) {
+ handleRuby(o, text, false);
+ continue;
+ }
+ _getText(o, text);
+ }
+ c.dispose();
+ return text.toString();
+
+ }
+
+ /**
+ *
+ * @return the phonetic (ruby) string associated with this run or an empty String if none exists
+ */
+ public String getPhonetic() {
+ StringBuffer text = new StringBuffer();
+
+ // Grab the text and tabs of the text run
+ // Do so in a way that preserves the ordering
+ XmlCursor c = run.newCursor();
+ c.selectPath("./*");
+ while (c.toNextSelection()) {
+ XmlObject o = c.getObject();
+ if (o instanceof CTRuby) {
+ handleRuby(o, text, true);
+ }
+ }
+ // Any picture text?
+ if (pictureText != null && pictureText.length() > 0) {
+ text.append("\n").append(pictureText).append("\n");
+ }
+ c.dispose();
+ return text.toString();
+ }
+
+ /**
+ *
+ * @param rubyObj rubyobject
+ * @param text buffer to which to append the content
+ * @param extractPhonetic extract the phonetic (rt) component or the base component
+ */
+ private void handleRuby(XmlObject rubyObj, StringBuffer text, boolean extractPhonetic) {
+ XmlCursor c = rubyObj.newCursor();
+
+ //according to the spec, a ruby object
+ //has the phonetic (rt) first, then the actual text (base)
+ //second.
+
+ c.selectPath(".//*");
+ boolean inRT = false;
+ boolean inBase = false;
+ while (c.toNextSelection()) {
+ XmlObject o = c.getObject();
+ if (o instanceof CTRubyContent) {
String tagName = o.getDomNode().getNodeName();
- // Field Codes (w:instrText, defined in spec sec. 17.16.23)
- // come up as instances of CTText, but we don't want them
- // in the normal text output
- if (!"w:instrText".equals(tagName)) {
- text.append(((CTText) o).getStringValue());
+ if ("w:rt".equals(tagName)) {
+ inRT = true;
+ } else if ("w:rubyBase".equals(tagName)) {
+ inRT = false;
+ inBase = true;
+ }
+ } else {
+ if (extractPhonetic && inRT) {
+ _getText(o, text);
+ } else if (! extractPhonetic && inBase) {
+ _getText(o, text);
}
}
+ }
+ c.dispose();
+ }
- // Complex type evaluation (currently only for extraction of check boxes)
- if (o instanceof CTFldChar) {
- CTFldChar ctfldChar = ((CTFldChar) o);
- if (ctfldChar.getFldCharType() == STFldCharType.BEGIN) {
- if (ctfldChar.getFfData() != null) {
- for (CTFFCheckBox checkBox : ctfldChar.getFfData().getCheckBoxList()) {
- if (checkBox.getDefault() != null && checkBox.getDefault().getVal() == STOnOff.X_1) {
- text.append("|X|");
- } else {
- text.append("|_|");
- }
+ private void _getText(XmlObject o, StringBuffer text) {
+
+ if (o instanceof CTText) {
+ String tagName = o.getDomNode().getNodeName();
+ // Field Codes (w:instrText, defined in spec sec. 17.16.23)
+ // come up as instances of CTText, but we don't want them
+ // in the normal text output
+ if (!"w:instrText".equals(tagName)) {
+ text.append(((CTText) o).getStringValue());
+ }
+ }
+
+ // Complex type evaluation (currently only for extraction of check boxes)
+ if (o instanceof CTFldChar) {
+ CTFldChar ctfldChar = ((CTFldChar) o);
+ if (ctfldChar.getFldCharType() == STFldCharType.BEGIN) {
+ if (ctfldChar.getFfData() != null) {
+ for (CTFFCheckBox checkBox : ctfldChar.getFfData().getCheckBoxList()) {
+ if (checkBox.getDefault() != null && checkBox.getDefault().getVal() == STOnOff.X_1) {
+ text.append("|X|");
+ } else {
+ text.append("|_|");
}
}
}
}
+ }
- if (o instanceof CTPTab) {
+ if (o instanceof CTPTab) {
+ text.append("\t");
+ }
+ if (o instanceof CTBr) {
+ text.append("\n");
+ }
+ if (o instanceof CTEmpty) {
+ // Some inline text elements get returned not as
+ // themselves, but as CTEmpty, owing to some odd
+ // definitions around line 5642 of the XSDs
+ // This bit works around it, and replicates the above
+ // rules for that case
+ String tagName = o.getDomNode().getNodeName();
+ if ("w:tab".equals(tagName) || "tab".equals(tagName)) {
text.append("\t");
}
- if (o instanceof CTBr) {
+ if ("w:br".equals(tagName) || "br".equals(tagName)) {
text.append("\n");
}
- if (o instanceof CTEmpty) {
- // Some inline text elements get returned not as
- // themselves, but as CTEmpty, owing to some odd
- // definitions around line 5642 of the XSDs
- // This bit works around it, and replicates the above
- // rules for that case
- String tagName = o.getDomNode().getNodeName();
- if ("w:tab".equals(tagName) || "tab".equals(tagName)) {
- text.append("\t");
- }
- if ("w:br".equals(tagName) || "br".equals(tagName)) {
- text.append("\n");
- }
- if ("w:cr".equals(tagName) || "cr".equals(tagName)) {
- text.append("\n");
- }
- }
- if (o instanceof CTFtnEdnRef) {
- CTFtnEdnRef ftn = (CTFtnEdnRef) o;
- String footnoteRef = ftn.getDomNode().getLocalName().equals("footnoteReference") ?
- "[footnoteRef:" + ftn.getId().intValue() + "]" : "[endnoteRef:" + ftn.getId().intValue() + "]";
- text.append(footnoteRef);
+ if ("w:cr".equals(tagName) || "cr".equals(tagName)) {
+ text.append("\n");
}
}
-
- c.dispose();
-
- // Any picture text?
- if (pictureText != null && pictureText.length() > 0) {
- text.append("\n").append(pictureText);
+ if (o instanceof CTFtnEdnRef) {
+ CTFtnEdnRef ftn = (CTFtnEdnRef) o;
+ String footnoteRef = ftn.getDomNode().getLocalName().equals("footnoteReference") ?
+ "[footnoteRef:" + ftn.getId().intValue() + "]" : "[endnoteRef:" + ftn.getId().intValue() + "]";
+ text.append(footnoteRef);
}
-
- return text.toString();
}
/**
diff --git a/src/ooxml/testcases/org/apache/poi/TestPOIXMLDocument.java b/src/ooxml/testcases/org/apache/poi/TestPOIXMLDocument.java
index 432e9f0..a636e20 100644
--- a/src/ooxml/testcases/org/apache/poi/TestPOIXMLDocument.java
+++ b/src/ooxml/testcases/org/apache/poi/TestPOIXMLDocument.java
@@ -28,6 +28,7 @@
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.lang.Thread.UncaughtExceptionHandler;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.HashMap;
@@ -40,6 +41,7 @@
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
+import org.apache.poi.util.IOUtils;
import org.apache.poi.util.NullOutputStream;
import org.apache.poi.util.PackageHelper;
import org.apache.poi.util.TempFile;
@@ -321,38 +323,61 @@
}
}
- @Test(expected=IllegalStateException.class)
- public void testOSGIClassLoadingAsIs() throws IOException {
+ @Test
+ public void testOSGIClassLoading() {
+ // the schema type loader is cached per thread in POIXMLTypeLoader.
+ // So create a new Thread and change the context class loader (which would normally be used)
+ // to not contain the OOXML classes
+ Runnable run = new Runnable() {
+ public void run() {
+ InputStream is = POIDataSamples.getSlideShowInstance().openResourceAsStream("table_test.pptx");
+ XMLSlideShow ppt = null;
+ try {
+ ppt = new XMLSlideShow(is);
+ ppt.getSlides().get(0).getShapes();
+ } catch (IOException e) {
+ fail("failed to load XMLSlideShow");
+ } finally {
+ IOUtils.closeQuietly(ppt);
+ IOUtils.closeQuietly(is);
+ }
+ }
+ };
+
Thread thread = Thread.currentThread();
ClassLoader cl = thread.getContextClassLoader();
- InputStream is = POIDataSamples.getSlideShowInstance().openResourceAsStream("table_test.pptx");
- try {
- thread.setContextClassLoader(cl.getParent());
- XMLSlideShow ppt = new XMLSlideShow(is);
- ppt.getSlides().get(0).getShapes();
- ppt.close();
- } finally {
- thread.setContextClassLoader(cl);
- is.close();
+ UncaughtHandler uh = new UncaughtHandler();
+
+ // check schema type loading and check if we could run in an OOM
+ Thread ta[] = new Thread[30];
+ for (int j=0; j<10; j++) {
+ for (int i=0; i<ta.length; i++) {
+ ta[i] = new Thread(run);
+ ta[i].setContextClassLoader(cl.getParent());
+ ta[i].setUncaughtExceptionHandler(uh);
+ ta[i].start();
+ }
+ for (int i=0; i<ta.length; i++) {
+ try {
+ ta[i].join();
+ } catch (InterruptedException e) {
+ fail("failed to join thread");
+ }
+ }
}
+ assertFalse(uh.hasException());
}
-
- @Test
- public void testOSGIClassLoadingFixed() throws IOException {
- Thread thread = Thread.currentThread();
- ClassLoader cl = thread.getContextClassLoader();
- InputStream is = POIDataSamples.getSlideShowInstance().openResourceAsStream("table_test.pptx");
- try {
- thread.setContextClassLoader(cl.getParent());
- POIXMLTypeLoader.setClassLoader(cl);
- XMLSlideShow ppt = new XMLSlideShow(is);
- ppt.getSlides().get(0).getShapes();
- ppt.close();
- } finally {
- thread.setContextClassLoader(cl);
- POIXMLTypeLoader.setClassLoader(null);
- is.close();
+ private static class UncaughtHandler implements UncaughtExceptionHandler {
+ Throwable e;
+
+ public synchronized void uncaughtException(Thread t, Throwable e) {
+ this.e = e;
+
+ }
+
+ public synchronized boolean hasException() {
+ return e != null;
}
}
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFFormulaEvaluation.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFFormulaEvaluation.java
index 3b1fdd3..5afad20 100644
--- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFFormulaEvaluation.java
+++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFFormulaEvaluation.java
@@ -694,4 +694,15 @@
assertSame(cell, same);
wb.close();
}
+
+ @Test
+ public void testBug61468() {
+ Workbook wb = XSSFTestDataSamples.openSampleWorkbook("simple-monthly-budget.xlsx");
+ FormulaEvaluator evaluator = wb.getCreationHelper().createFormulaEvaluator();
+ Cell cell = wb.getSheetAt(0).getRow(8).getCell(4);
+ assertEquals(3750, cell.getNumericCellValue(), 0.001);
+
+ CellValue value = evaluator.evaluate(cell);
+ assertEquals(3750, value.getNumberValue(), 0.001);
+ }
}
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFTable.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFTable.java
index 6b916b1..06ef68a 100644
--- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFTable.java
+++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFTable.java
@@ -163,7 +163,7 @@
public void isHasTotalsRow() throws IOException {
XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("StructuredReferences.xlsx");
XSSFTable table = wb.getTable("\\_Prime.1");
- assertFalse(table.isHasTotalsRow());
+ assertFalse(table.getTotalsRowCount() > 0);
wb.close();
}
diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
index b83b27d..b7d0b03 100644
--- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
@@ -421,4 +421,16 @@
extractor.getText());
extractor.close();
}
+
+ public void testPhonetic() throws IOException {
+ XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("61470.docx");
+ XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+ //expect: baseText (phoneticText)
+ assertEquals("\u6771\u4EAC (\u3068\u3046\u304D\u3087\u3046)", extractor.getText().trim());
+ extractor.close();
+ extractor = new XWPFWordExtractor(doc);
+ extractor.setConcatenatePhoneticRuns(false);
+ assertEquals("\u6771\u4EAC", extractor.getText().trim());
+ }
+
}
diff --git a/test-data/document/61470.docx b/test-data/document/61470.docx
new file mode 100644
index 0000000..6fc1afe
--- /dev/null
+++ b/test-data/document/61470.docx
Binary files differ
diff --git a/test-data/spreadsheet/simple-monthly-budget.xlsx b/test-data/spreadsheet/simple-monthly-budget.xlsx
new file mode 100644
index 0000000..b8613cd
--- /dev/null
+++ b/test-data/spreadsheet/simple-monthly-budget.xlsx
Binary files differ