tag r1807428 as 3.17

git-svn-id: https://svn.apache.org/repos/asf/poi/tags/REL_3_17_FINAL@1807429 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/src/java/org/apache/poi/ss/formula/FormulaParser.java b/src/java/org/apache/poi/ss/formula/FormulaParser.java
index 8d0653b..fd17aca 100644
--- a/src/java/org/apache/poi/ss/formula/FormulaParser.java
+++ b/src/java/org/apache/poi/ss/formula/FormulaParser.java
@@ -736,7 +736,7 @@
         // Done reading from input stream
         // Ok to return now
 
-        if (isTotalsSpec && !tbl.isHasTotalsRow()) {
+        if (isTotalsSpec && tbl.getTotalsRowCount() == 0) {
             return new ParseNode(ErrPtg.REF_INVALID);
         }
         if ((isThisRow || isThisRowSpec) && (_rowIndex < startRow || endRow < _rowIndex)) {
@@ -759,14 +759,14 @@
             if (nSpecQuantifiers == 1 && isAllSpec) {
                 //do nothing
             } else if (isDataSpec && isHeadersSpec) {
-                if (tbl.isHasTotalsRow()) {
+                if (tbl.getTotalsRowCount() > 0) {
                     actualEndRow = endRow - 1;
                 }
             } else if (isDataSpec && isTotalsSpec) {
                 actualStartRow = startRow + 1;
             } else if (nSpecQuantifiers == 1 && isDataSpec) {
                 actualStartRow = startRow + 1;
-                if (tbl.isHasTotalsRow()) {
+                if (tbl.getTotalsRowCount() > 0) {
                     actualEndRow = endRow - 1;
                 }
             } else if (nSpecQuantifiers == 1 && isHeadersSpec) {
@@ -785,7 +785,7 @@
                 actualEndRow = _rowIndex; 
             } else { // Really no special quantifiers
                 actualStartRow++;
-                if (tbl.isHasTotalsRow()) actualEndRow--;
+                if (tbl.getTotalsRowCount() > 0) actualEndRow--;
             }
         }
 
diff --git a/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java b/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java
index 911eef6..7452da8 100644
--- a/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java
+++ b/src/ooxml/java/org/apache/poi/POIXMLTypeLoader.java
@@ -33,6 +33,7 @@
 
 import org.apache.poi.openxml4j.opc.PackageNamespaces;
 import org.apache.poi.util.DocumentHelper;
+import org.apache.poi.util.Removal;
 import org.apache.xmlbeans.SchemaType;
 import org.apache.xmlbeans.SchemaTypeLoader;
 import org.apache.xmlbeans.XmlBeans;
@@ -49,7 +50,7 @@
 @SuppressWarnings("deprecation")
 public class POIXMLTypeLoader {
 
-    private static ThreadLocal<ClassLoader> classLoader = new ThreadLocal<ClassLoader>();
+    private static ThreadLocal<SchemaTypeLoader> typeLoader = new ThreadLocal<SchemaTypeLoader>();
 
     // TODO: Do these have a good home like o.a.p.openxml4j.opc.PackageNamespaces and PackageRelationshipTypes?
     // These constants should be common to all of POI and easy to use by other applications such as Tika
@@ -109,20 +110,26 @@
      * when the user code is finalized.
      * 
      * @param cl the classloader to be used when XmlBeans classes and definitions are looked up
+     * @deprecated in POI 3.17 - setting a classloader from the outside is now obsolete,
+     *  the classloader of the SchemaType will be used
      */
+    @Deprecated
+    @Removal(version="4.0")
     public static void setClassLoader(ClassLoader cl) {
-        classLoader.set(cl);
     }
     
-    private static SchemaTypeLoader getTypeLoader() {
-        ClassLoader cl = classLoader.get();
-        return (cl == null)
-            ? XmlBeans.getContextTypeLoader()
-            : XmlBeans.typeLoaderForClassLoader(cl);
+    private static SchemaTypeLoader getTypeLoader(SchemaType type) {
+        SchemaTypeLoader tl = typeLoader.get();
+        if (tl == null) {
+            ClassLoader cl = type.getClass().getClassLoader();
+            tl = XmlBeans.typeLoaderForClassLoader(cl);
+            typeLoader.set(tl);
+        }
+        return tl;
     }
     
     public static XmlObject newInstance(SchemaType type, XmlOptions options) {
-        return getTypeLoader().newInstance(type, getXmlOptions(options));
+        return getTypeLoader(type).newInstance(type, getXmlOptions(options));
     }
 
     public static XmlObject parse(String xmlText, SchemaType type, XmlOptions options) throws XmlException {
@@ -154,34 +161,34 @@
     public static XmlObject parse(InputStream jiois, SchemaType type, XmlOptions options) throws XmlException, IOException {
         try {
             Document doc = DocumentHelper.readDocument(jiois);
-            return getTypeLoader().parse(doc.getDocumentElement(), type, getXmlOptions(options));
+            return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options));
         } catch (SAXException e) {
             throw new IOException("Unable to parse xml bean", e);
         }
     }
 
     public static XmlObject parse(XMLStreamReader xsr, SchemaType type, XmlOptions options) throws XmlException {
-        return getTypeLoader().parse(xsr, type, getXmlOptions(options));
+        return getTypeLoader(type).parse(xsr, type, getXmlOptions(options));
     }
 
     public static XmlObject parse(Reader jior, SchemaType type, XmlOptions options) throws XmlException, IOException {
         try {
             Document doc = DocumentHelper.readDocument(new InputSource(jior));
-            return getTypeLoader().parse(doc.getDocumentElement(), type, getXmlOptions(options));
+            return getTypeLoader(type).parse(doc.getDocumentElement(), type, getXmlOptions(options));
         } catch (SAXException e) {
             throw new XmlException("Unable to parse xml bean", e);
         }
     }
 
     public static XmlObject parse(Node node, SchemaType type, XmlOptions options) throws XmlException {
-        return getTypeLoader().parse(node, type, getXmlOptions(options));
+        return getTypeLoader(type).parse(node, type, getXmlOptions(options));
     }
 
     public static XmlObject parse(XMLInputStream xis, SchemaType type, XmlOptions options) throws XmlException, XMLStreamException {
-        return getTypeLoader().parse(xis, type, getXmlOptions(options));
+        return getTypeLoader(type).parse(xis, type, getXmlOptions(options));
     }
     
     public static XMLInputStream newValidatingXMLInputStream ( XMLInputStream xis, SchemaType type, XmlOptions options ) throws XmlException, XMLStreamException {
-        return getTypeLoader().newValidatingXMLInputStream(xis, type, getXmlOptions(options));
+        return getTypeLoader(type).newValidatingXMLInputStream(xis, type, getXmlOptions(options));
     }
 }
diff --git a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
index 070f0c1..f029829 100644
--- a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
@@ -33,6 +33,7 @@
 import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun;
 import org.apache.poi.xwpf.usermodel.XWPFParagraph;
 import org.apache.poi.xwpf.usermodel.XWPFRelation;
+import org.apache.poi.xwpf.usermodel.XWPFRun;
 import org.apache.poi.xwpf.usermodel.XWPFSDT;
 import org.apache.poi.xwpf.usermodel.XWPFSDTCell;
 import org.apache.poi.xwpf.usermodel.XWPFTable;
@@ -53,6 +54,7 @@
 
     private XWPFDocument document;
     private boolean fetchHyperlinks = false;
+    private boolean concatenatePhoneticRuns = true;
 
     public XWPFWordExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
         this(new XWPFDocument(container));
@@ -86,6 +88,14 @@
         fetchHyperlinks = fetch;
     }
 
+    /**
+     * Should we concatenate phonetic runs in extraction.  Default is <code>true</code>
+     * @param concatenatePhoneticRuns
+     */
+    public void setConcatenatePhoneticRuns(boolean concatenatePhoneticRuns) {
+        this.concatenatePhoneticRuns = concatenatePhoneticRuns;
+    }
+
     public String getText() {
         StringBuffer text = new StringBuffer();
         XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
@@ -130,7 +140,11 @@
 
 
         for (IRunElement run : paragraph.getRuns()) {
-            text.append(run);
+            if (! concatenatePhoneticRuns && run instanceof XWPFRun) {
+                text.append(((XWPFRun)run).text());
+            } else {
+                text.append(run);
+            }
             if (run instanceof XWPFHyperlinkRun && fetchHyperlinks) {
                 XWPFHyperlink link = ((XWPFHyperlinkRun) run).getHyperlink(document);
                 if (link != null)
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java
index 85289c5..80b56c7 100644
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRun.java
@@ -68,6 +68,8 @@
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRuby;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRubyContent;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSignedHpsMeasure;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSignedTwipsMeasure;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
@@ -1042,10 +1044,15 @@
     }
 
     /**
-     * Returns the string version of the text
+     * Returns the string version of the text and the phonetic string
      */
     public String toString() {
-        return text();
+        String phonetic = getPhonetic();
+        if (phonetic.length() > 0) {
+            return text() +" ("+phonetic.toString()+")";
+        } else {
+            return text();
+        }
     }
 
     /**
@@ -1061,71 +1068,136 @@
         c.selectPath("./*");
         while (c.toNextSelection()) {
             XmlObject o = c.getObject();
-            if (o instanceof CTText) {
+            if (o instanceof CTRuby) {
+                handleRuby(o, text, false);
+                continue;
+            }
+            _getText(o, text);
+        }
+        c.dispose();
+        return text.toString();
+
+    }
+
+    /**
+     *
+     * @return the phonetic (ruby) string associated with this run or an empty String if none exists
+     */
+    public String getPhonetic() {
+        StringBuffer text = new StringBuffer();
+
+        // Grab the text and tabs of the text run
+        // Do so in a way that preserves the ordering
+        XmlCursor c = run.newCursor();
+        c.selectPath("./*");
+        while (c.toNextSelection()) {
+            XmlObject o = c.getObject();
+            if (o instanceof CTRuby) {
+                handleRuby(o, text, true);
+            }
+        }
+        // Any picture text?
+        if (pictureText != null && pictureText.length() > 0) {
+            text.append("\n").append(pictureText).append("\n");
+        }
+        c.dispose();
+        return text.toString();
+    }
+
+    /**
+     *
+     * @param rubyObj rubyobject
+     * @param text buffer to which to append the content
+     * @param extractPhonetic extract the phonetic (rt) component or the base component
+     */
+    private void handleRuby(XmlObject rubyObj, StringBuffer text, boolean extractPhonetic) {
+        XmlCursor c = rubyObj.newCursor();
+
+        //according to the spec, a ruby object
+        //has the phonetic (rt) first, then the actual text (base)
+        //second.
+
+        c.selectPath(".//*");
+        boolean inRT = false;
+        boolean inBase = false;
+        while (c.toNextSelection()) {
+            XmlObject o = c.getObject();
+            if (o instanceof CTRubyContent) {
                 String tagName = o.getDomNode().getNodeName();
-                // Field Codes (w:instrText, defined in spec sec. 17.16.23)
-                //  come up as instances of CTText, but we don't want them
-                //  in the normal text output
-                if (!"w:instrText".equals(tagName)) {
-                    text.append(((CTText) o).getStringValue());
+                if ("w:rt".equals(tagName)) {
+                    inRT = true;
+                } else if ("w:rubyBase".equals(tagName)) {
+                    inRT = false;
+                    inBase = true;
+                }
+            } else {
+                if (extractPhonetic && inRT) {
+                    _getText(o, text);
+                } else if (! extractPhonetic && inBase) {
+                    _getText(o, text);
                 }
             }
+        }
+        c.dispose();
+    }
 
-            // Complex type evaluation (currently only for extraction of check boxes)
-            if (o instanceof CTFldChar) {
-                CTFldChar ctfldChar = ((CTFldChar) o);
-                if (ctfldChar.getFldCharType() == STFldCharType.BEGIN) {
-                    if (ctfldChar.getFfData() != null) {
-                        for (CTFFCheckBox checkBox : ctfldChar.getFfData().getCheckBoxList()) {
-                            if (checkBox.getDefault() != null && checkBox.getDefault().getVal() == STOnOff.X_1) {
-                                text.append("|X|");
-                            } else {
-                                text.append("|_|");
-                            }
+    private void _getText(XmlObject o, StringBuffer text) {
+
+        if (o instanceof CTText) {
+            String tagName = o.getDomNode().getNodeName();
+            // Field Codes (w:instrText, defined in spec sec. 17.16.23)
+            //  come up as instances of CTText, but we don't want them
+            //  in the normal text output
+            if (!"w:instrText".equals(tagName)) {
+                text.append(((CTText) o).getStringValue());
+            }
+        }
+
+        // Complex type evaluation (currently only for extraction of check boxes)
+        if (o instanceof CTFldChar) {
+            CTFldChar ctfldChar = ((CTFldChar) o);
+            if (ctfldChar.getFldCharType() == STFldCharType.BEGIN) {
+                if (ctfldChar.getFfData() != null) {
+                    for (CTFFCheckBox checkBox : ctfldChar.getFfData().getCheckBoxList()) {
+                        if (checkBox.getDefault() != null && checkBox.getDefault().getVal() == STOnOff.X_1) {
+                            text.append("|X|");
+                        } else {
+                            text.append("|_|");
                         }
                     }
                 }
             }
+        }
 
-            if (o instanceof CTPTab) {
+        if (o instanceof CTPTab) {
+            text.append("\t");
+        }
+        if (o instanceof CTBr) {
+            text.append("\n");
+        }
+        if (o instanceof CTEmpty) {
+            // Some inline text elements get returned not as
+            //  themselves, but as CTEmpty, owing to some odd
+            //  definitions around line 5642 of the XSDs
+            // This bit works around it, and replicates the above
+            //  rules for that case
+            String tagName = o.getDomNode().getNodeName();
+            if ("w:tab".equals(tagName) || "tab".equals(tagName)) {
                 text.append("\t");
             }
-            if (o instanceof CTBr) {
+            if ("w:br".equals(tagName) || "br".equals(tagName)) {
                 text.append("\n");
             }
-            if (o instanceof CTEmpty) {
-                // Some inline text elements get returned not as
-                //  themselves, but as CTEmpty, owing to some odd
-                //  definitions around line 5642 of the XSDs
-                // This bit works around it, and replicates the above
-                //  rules for that case
-                String tagName = o.getDomNode().getNodeName();
-                if ("w:tab".equals(tagName) || "tab".equals(tagName)) {
-                    text.append("\t");
-                }
-                if ("w:br".equals(tagName) || "br".equals(tagName)) {
-                    text.append("\n");
-                }
-                if ("w:cr".equals(tagName) || "cr".equals(tagName)) {
-                    text.append("\n");
-                }
-            }
-            if (o instanceof CTFtnEdnRef) {
-                CTFtnEdnRef ftn = (CTFtnEdnRef) o;
-                String footnoteRef = ftn.getDomNode().getLocalName().equals("footnoteReference") ?
-                        "[footnoteRef:" + ftn.getId().intValue() + "]" : "[endnoteRef:" + ftn.getId().intValue() + "]";
-                text.append(footnoteRef);
+            if ("w:cr".equals(tagName) || "cr".equals(tagName)) {
+                text.append("\n");
             }
         }
-
-        c.dispose();
-
-        // Any picture text?
-        if (pictureText != null && pictureText.length() > 0) {
-            text.append("\n").append(pictureText);
+        if (o instanceof CTFtnEdnRef) {
+            CTFtnEdnRef ftn = (CTFtnEdnRef) o;
+            String footnoteRef = ftn.getDomNode().getLocalName().equals("footnoteReference") ?
+                    "[footnoteRef:" + ftn.getId().intValue() + "]" : "[endnoteRef:" + ftn.getId().intValue() + "]";
+            text.append(footnoteRef);
         }
-
-        return text.toString();
     }
 
     /**
diff --git a/src/ooxml/testcases/org/apache/poi/TestPOIXMLDocument.java b/src/ooxml/testcases/org/apache/poi/TestPOIXMLDocument.java
index 432e9f0..a636e20 100644
--- a/src/ooxml/testcases/org/apache/poi/TestPOIXMLDocument.java
+++ b/src/ooxml/testcases/org/apache/poi/TestPOIXMLDocument.java
@@ -28,6 +28,7 @@
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.lang.Thread.UncaughtExceptionHandler;
 import java.lang.reflect.InvocationTargetException;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -40,6 +41,7 @@
 import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.openxml4j.opc.PackagePart;
 import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
+import org.apache.poi.util.IOUtils;
 import org.apache.poi.util.NullOutputStream;
 import org.apache.poi.util.PackageHelper;
 import org.apache.poi.util.TempFile;
@@ -321,38 +323,61 @@
         }
     }
     
-    @Test(expected=IllegalStateException.class)
-    public void testOSGIClassLoadingAsIs() throws IOException {
+    @Test
+    public void testOSGIClassLoading() {
+        // the schema type loader is cached per thread in POIXMLTypeLoader.
+        // So create a new Thread and change the context class loader (which would normally be used)
+        // to not contain the OOXML classes
+        Runnable run = new Runnable() {
+            public void run() {
+                InputStream is = POIDataSamples.getSlideShowInstance().openResourceAsStream("table_test.pptx");
+                XMLSlideShow ppt = null;
+                try {
+                    ppt = new XMLSlideShow(is);
+                    ppt.getSlides().get(0).getShapes();
+                } catch (IOException e) {
+                    fail("failed to load XMLSlideShow");
+                } finally {
+                    IOUtils.closeQuietly(ppt);
+                    IOUtils.closeQuietly(is);
+                }
+            }
+        };
+
         Thread thread = Thread.currentThread();
         ClassLoader cl = thread.getContextClassLoader();
-        InputStream is = POIDataSamples.getSlideShowInstance().openResourceAsStream("table_test.pptx");
-        try {
-            thread.setContextClassLoader(cl.getParent());
-            XMLSlideShow ppt = new XMLSlideShow(is);
-            ppt.getSlides().get(0).getShapes();
-            ppt.close();
-        } finally {
-            thread.setContextClassLoader(cl);
-            is.close();
+        UncaughtHandler uh = new UncaughtHandler();
+        
+        // check schema type loading and check if we could run in an OOM
+        Thread ta[] = new Thread[30];
+        for (int j=0; j<10; j++) {
+            for (int i=0; i<ta.length; i++) {
+                ta[i] = new Thread(run);
+                ta[i].setContextClassLoader(cl.getParent());
+                ta[i].setUncaughtExceptionHandler(uh);
+                ta[i].start();
+            }
+            for (int i=0; i<ta.length; i++) {
+                try {
+                    ta[i].join();
+                } catch (InterruptedException e) {
+                    fail("failed to join thread");
+                }
+            }
         }
+        assertFalse(uh.hasException());
     }
 
-
-    @Test
-    public void testOSGIClassLoadingFixed() throws IOException {
-        Thread thread = Thread.currentThread();
-        ClassLoader cl = thread.getContextClassLoader();
-        InputStream is = POIDataSamples.getSlideShowInstance().openResourceAsStream("table_test.pptx");
-        try {
-            thread.setContextClassLoader(cl.getParent());
-            POIXMLTypeLoader.setClassLoader(cl);
-            XMLSlideShow ppt = new XMLSlideShow(is);
-            ppt.getSlides().get(0).getShapes();
-            ppt.close();
-        } finally {
-            thread.setContextClassLoader(cl);
-            POIXMLTypeLoader.setClassLoader(null);
-            is.close();
+    private static class UncaughtHandler implements UncaughtExceptionHandler {
+        Throwable e;
+        
+        public synchronized void uncaughtException(Thread t, Throwable e) {
+            this.e = e;
+            
+        }
+        
+        public synchronized boolean hasException() {
+            return e != null;
         }
     }
 
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFFormulaEvaluation.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFFormulaEvaluation.java
index 3b1fdd3..5afad20 100644
--- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFFormulaEvaluation.java
+++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFFormulaEvaluation.java
@@ -694,4 +694,15 @@
         assertSame(cell, same);
         wb.close();
     }
+    
+    @Test
+    public void testBug61468() {
+        Workbook wb = XSSFTestDataSamples.openSampleWorkbook("simple-monthly-budget.xlsx");
+        FormulaEvaluator evaluator = wb.getCreationHelper().createFormulaEvaluator();
+        Cell cell = wb.getSheetAt(0).getRow(8).getCell(4);
+        assertEquals(3750, cell.getNumericCellValue(), 0.001);
+
+        CellValue value = evaluator.evaluate(cell);
+        assertEquals(3750, value.getNumberValue(), 0.001);
+    }
 }
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFTable.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFTable.java
index 6b916b1..06ef68a 100644
--- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFTable.java
+++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFTable.java
@@ -163,7 +163,7 @@
     public void isHasTotalsRow() throws IOException {
         XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("StructuredReferences.xlsx");
         XSSFTable table = wb.getTable("\\_Prime.1");
-        assertFalse(table.isHasTotalsRow());
+        assertFalse(table.getTotalsRowCount() > 0);
         wb.close(); 
     }
 
diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
index b83b27d..b7d0b03 100644
--- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
@@ -421,4 +421,16 @@
                 extractor.getText());
         extractor.close();
     }
+
+    public void testPhonetic() throws IOException {
+        XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("61470.docx");
+        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+        //expect: baseText (phoneticText)
+        assertEquals("\u6771\u4EAC (\u3068\u3046\u304D\u3087\u3046)", extractor.getText().trim());
+        extractor.close();
+        extractor = new XWPFWordExtractor(doc);
+        extractor.setConcatenatePhoneticRuns(false);
+        assertEquals("\u6771\u4EAC", extractor.getText().trim());
+    }
+
 }
diff --git a/test-data/document/61470.docx b/test-data/document/61470.docx
new file mode 100644
index 0000000..6fc1afe
--- /dev/null
+++ b/test-data/document/61470.docx
Binary files differ
diff --git a/test-data/spreadsheet/simple-monthly-budget.xlsx b/test-data/spreadsheet/simple-monthly-budget.xlsx
new file mode 100644
index 0000000..b8613cd
--- /dev/null
+++ b/test-data/spreadsheet/simple-monthly-budget.xlsx
Binary files differ