| /* ==================================================================== |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==================================================================== */ |
| |
| package org.apache.poi.xssf.usermodel; |
| |
| import java.util.*; |
| import java.util.regex.Pattern; |
| import java.util.regex.Matcher; |
| |
| import javax.xml.namespace.QName; |
| |
| import org.apache.poi.ss.usermodel.Font; |
| import org.apache.poi.ss.usermodel.RichTextString; |
| import org.apache.poi.xssf.model.StylesTable; |
| import org.apache.poi.xssf.model.ThemesTable; |
| import org.apache.poi.util.Internal; |
| import org.apache.xmlbeans.XmlCursor; |
| import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTColor; |
| import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTFont; |
| import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRElt; |
| import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRPrElt; |
| import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst; |
| import org.openxmlformats.schemas.spreadsheetml.x2006.main.STXstring; |
| |
| |
| /** |
| * Rich text unicode string. These strings can have fonts applied to arbitary parts of the string. |
| * |
| * <p> |
| * Most strings in a workbook have formatting applied at the cell level, that is, the entire string in the cell has the |
| * same formatting applied. In these cases, the formatting for the cell is stored in the styles part, |
| * and the string for the cell can be shared across the workbook. The following code illustrates the example. |
| * </p> |
| * |
| * <blockquote> |
| * <pre> |
| * cell1.setCellValue(new XSSFRichTextString("Apache POI")); |
| * cell2.setCellValue(new XSSFRichTextString("Apache POI")); |
| * cell3.setCellValue(new XSSFRichTextString("Apache POI")); |
| * </pre> |
| * </blockquote> |
| * In the above example all three cells will use the same string cached on workbook level. |
| * |
| * <p> |
| * Some strings in the workbook may have formatting applied at a level that is more granular than the cell level. |
| * For instance, specific characters within the string may be bolded, have coloring, italicizing, etc. |
| * In these cases, the formatting is stored along with the text in the string table, and is treated as |
| * a unique entry in the workbook. The following xml and code snippet illustrate this. |
| * </p> |
| * |
| * <blockquote> |
| * <pre> |
| * XSSFRichTextString s1 = new XSSFRichTextString("Apache POI"); |
| * s1.applyFont(boldArial); |
| * cell1.setCellValue(s1); |
| * |
| * XSSFRichTextString s2 = new XSSFRichTextString("Apache POI"); |
| * s2.applyFont(italicCourier); |
| * cell2.setCellValue(s2); |
| * </pre> |
| * </blockquote> |
| */ |
| public class XSSFRichTextString implements RichTextString { |
| private static final Pattern utfPtrn = Pattern.compile("_x([0-9A-Fa-f]{4})_"); |
| |
| private CTRst st; |
| private StylesTable styles; |
| |
| /** |
| * Create a rich text string |
| */ |
| public XSSFRichTextString(String str) { |
| st = CTRst.Factory.newInstance(); |
| st.setT(str); |
| preserveSpaces(st.xgetT()); |
| } |
| |
| /** |
| * Create empty rich text string and initialize it with empty string |
| */ |
| public XSSFRichTextString() { |
| st = CTRst.Factory.newInstance(); |
| } |
| |
| /** |
| * Create a rich text string from the supplied XML bean |
| */ |
| public XSSFRichTextString(CTRst st) { |
| this.st = st; |
| } |
| |
| /** |
| * Applies a font to the specified characters of a string. |
| * |
| * @param startIndex The start index to apply the font to (inclusive) |
| * @param endIndex The end index to apply the font to (exclusive) |
| * @param fontIndex The font to use. |
| */ |
| public void applyFont(int startIndex, int endIndex, short fontIndex) { |
| XSSFFont font; |
| if(styles == null) { |
| //style table is not set, remember fontIndex and set the run properties later, |
| //when setStylesTableReference is called |
| font = new XSSFFont(); |
| font.setFontName("#" + fontIndex); |
| } else { |
| font = styles.getFontAt(fontIndex); |
| } |
| applyFont(startIndex, endIndex, font); |
| } |
| |
| /** |
| * Applies a font to the specified characters of a string. |
| * |
| * @param startIndex The start index to apply the font to (inclusive) |
| * @param endIndex The end index to apply to font to (exclusive) |
| * @param font The index of the font to use. |
| */ |
| public void applyFont(int startIndex, int endIndex, Font font) { |
| if (startIndex > endIndex) |
| throw new IllegalArgumentException("Start index must be less than end index, but had " + startIndex + " and " + endIndex); |
| if (startIndex < 0 || endIndex > length()) |
| throw new IllegalArgumentException("Start and end index not in range, but had " + startIndex + " and " + endIndex); |
| |
| if (startIndex == endIndex) |
| return; |
| |
| if(st.sizeOfRArray() == 0 && st.isSetT()) { |
| //convert <t>string</t> into a text run: <r><t>string</t></r> |
| st.addNewR().setT(st.getT()); |
| st.unsetT(); |
| } |
| |
| String text = getString(); |
| XSSFFont xssfFont = (XSSFFont)font; |
| |
| TreeMap<Integer, CTRPrElt> formats = getFormatMap(st); |
| CTRPrElt fmt = CTRPrElt.Factory.newInstance(); |
| setRunAttributes(xssfFont.getCTFont(), fmt); |
| applyFont(formats, startIndex, endIndex, fmt); |
| |
| CTRst newSt = buildCTRst(text, formats); |
| st.set(newSt); |
| } |
| |
| /** |
| * Sets the font of the entire string. |
| * @param font The font to use. |
| */ |
| public void applyFont(Font font) { |
| String text = getString(); |
| applyFont(0, text.length(), font); |
| } |
| |
| /** |
| * Applies the specified font to the entire string. |
| * |
| * @param fontIndex the font to apply. |
| */ |
| public void applyFont(short fontIndex) { |
| XSSFFont font; |
| if(styles == null) { |
| font = new XSSFFont(); |
| font.setFontName("#" + fontIndex); |
| } else { |
| font = styles.getFontAt(fontIndex); |
| } |
| String text = getString(); |
| applyFont(0, text.length(), font); |
| } |
| |
| /** |
| * Append new text to this text run and apply the specify font to it |
| * |
| * @param text the text to append |
| * @param font the font to apply to the appended text or <code>null</code> if no formatting is required |
| */ |
| public void append(String text, XSSFFont font){ |
| if(st.sizeOfRArray() == 0 && st.isSetT()) { |
| //convert <t>string</t> into a text run: <r><t>string</t></r> |
| CTRElt lt = st.addNewR(); |
| lt.setT(st.getT()); |
| preserveSpaces(lt.xgetT()); |
| st.unsetT(); |
| } |
| CTRElt lt = st.addNewR(); |
| lt.setT(text); |
| preserveSpaces(lt.xgetT()); |
| |
| if (font != null) { |
| CTRPrElt pr = lt.addNewRPr(); |
| setRunAttributes(font.getCTFont(), pr); |
| } |
| } |
| |
| /** |
| * Append new text to this text run |
| * |
| * @param text the text to append |
| */ |
| public void append(String text){ |
| append(text, null); |
| } |
| |
| /** |
| * Copy font attributes from CTFont bean into CTRPrElt bean |
| */ |
| private void setRunAttributes(CTFont ctFont, CTRPrElt pr){ |
| if(ctFont.sizeOfBArray() > 0) pr.addNewB().setVal(ctFont.getBArray(0).getVal()); |
| if(ctFont.sizeOfUArray() > 0) pr.addNewU().setVal(ctFont.getUArray(0).getVal()); |
| if(ctFont.sizeOfIArray() > 0) pr.addNewI().setVal(ctFont.getIArray(0).getVal()); |
| if(ctFont.sizeOfColorArray() > 0) { |
| CTColor c1 = ctFont.getColorArray(0); |
| CTColor c2 = pr.addNewColor(); |
| if(c1.isSetAuto()) c2.setAuto(c1.getAuto()); |
| if(c1.isSetIndexed()) c2.setIndexed(c1.getIndexed()); |
| if(c1.isSetRgb()) c2.setRgb(c1.getRgb()); |
| if(c1.isSetTheme()) c2.setTheme(c1.getTheme()); |
| if(c1.isSetTint()) c2.setTint(c1.getTint()); |
| } |
| if(ctFont.sizeOfSzArray() > 0) pr.addNewSz().setVal(ctFont.getSzArray(0).getVal()); |
| if(ctFont.sizeOfNameArray() > 0) pr.addNewRFont().setVal(ctFont.getNameArray(0).getVal()); |
| if(ctFont.sizeOfFamilyArray() > 0) pr.addNewFamily().setVal(ctFont.getFamilyArray(0).getVal()); |
| if(ctFont.sizeOfSchemeArray() > 0) pr.addNewScheme().setVal(ctFont.getSchemeArray(0).getVal()); |
| if(ctFont.sizeOfCharsetArray() > 0) pr.addNewCharset().setVal(ctFont.getCharsetArray(0).getVal()); |
| if(ctFont.sizeOfCondenseArray() > 0) pr.addNewCondense().setVal(ctFont.getCondenseArray(0).getVal()); |
| if(ctFont.sizeOfExtendArray() > 0) pr.addNewExtend().setVal(ctFont.getExtendArray(0).getVal()); |
| if(ctFont.sizeOfVertAlignArray() > 0) pr.addNewVertAlign().setVal(ctFont.getVertAlignArray(0).getVal()); |
| if(ctFont.sizeOfOutlineArray() > 0) pr.addNewOutline().setVal(ctFont.getOutlineArray(0).getVal()); |
| if(ctFont.sizeOfShadowArray() > 0) pr.addNewShadow().setVal(ctFont.getShadowArray(0).getVal()); |
| if(ctFont.sizeOfStrikeArray() > 0) pr.addNewStrike().setVal(ctFont.getStrikeArray(0).getVal()); |
| } |
| |
| /** |
| * Does this string have any explicit formatting applied, or is |
| * it just text in the default style? |
| */ |
| public boolean hasFormatting() { |
| //noinspection deprecation - for performance reasons! |
| CTRElt[] rs = st.getRArray(); |
| if (rs == null || rs.length == 0) { |
| return false; |
| } |
| for (CTRElt r : rs) { |
| if (r.isSetRPr()) return true; |
| } |
| return false; |
| } |
| |
| /** |
| * Removes any formatting that may have been applied to the string. |
| */ |
| public void clearFormatting() { |
| String text = getString(); |
| st.setRArray(null); |
| st.setT(text); |
| } |
| |
| /** |
| * The index within the string to which the specified formatting run applies. |
| * |
| * @param index the index of the formatting run |
| * @return the index within the string. |
| */ |
| public int getIndexOfFormattingRun(int index) { |
| if(st.sizeOfRArray() == 0) return 0; |
| |
| int pos = 0; |
| for(int i = 0; i < st.sizeOfRArray(); i++){ |
| CTRElt r = st.getRArray(i); |
| if(i == index) return pos; |
| |
| pos += r.getT().length(); |
| } |
| return -1; |
| } |
| |
| /** |
| * Returns the number of characters this format run covers. |
| * |
| * @param index the index of the formatting run |
| * @return the number of characters this format run covers |
| */ |
| public int getLengthOfFormattingRun(int index) { |
| if(st.sizeOfRArray() == 0 || index >= st.sizeOfRArray()) { |
| return -1; |
| } |
| |
| CTRElt r = st.getRArray(index); |
| return r.getT().length(); |
| } |
| |
| /** |
| * Returns the plain string representation. |
| */ |
| public String getString() { |
| if(st.sizeOfRArray() == 0) { |
| return utfDecode(st.getT()); |
| } |
| StringBuilder buf = new StringBuilder(); |
| //noinspection deprecation - for performance reasons! |
| for(CTRElt r : st.getRArray()){ |
| buf.append(r.getT()); |
| } |
| return utfDecode(buf.toString()); |
| } |
| |
| /** |
| * Removes any formatting and sets new string value |
| * |
| * @param s new string value |
| */ |
| public void setString(String s){ |
| clearFormatting(); |
| st.setT(s); |
| preserveSpaces(st.xgetT()); |
| } |
| |
| /** |
| * Returns the plain string representation. |
| */ |
| public String toString() { |
| return getString(); |
| } |
| |
| /** |
| * Returns the number of characters in this string. |
| */ |
| public int length() { |
| return getString().length(); |
| } |
| |
| /** |
| * @return The number of formatting runs used. |
| */ |
| public int numFormattingRuns() { |
| return st.sizeOfRArray(); |
| } |
| |
| /** |
| * Gets a copy of the font used in a particular formatting run. |
| * |
| * @param index the index of the formatting run |
| * @return A copy of the font used or null if no formatting is applied to the specified text run. |
| */ |
| public XSSFFont getFontOfFormattingRun(int index) { |
| if(st.sizeOfRArray() == 0 || index >= st.sizeOfRArray()) { |
| return null; |
| } |
| |
| CTRElt r = st.getRArray(index); |
| if(r.getRPr() != null) { |
| XSSFFont fnt = new XSSFFont(toCTFont(r.getRPr())); |
| fnt.setThemesTable(getThemesTable()); |
| return fnt; |
| } |
| |
| return null; |
| } |
| |
| /** |
| * Return a copy of the font in use at a particular index. |
| * |
| * @param index The index. |
| * @return A copy of the font that's currently being applied at that |
| * index or null if no font is being applied or the |
| * index is out of range. |
| */ |
| public XSSFFont getFontAtIndex( int index ) { |
| final ThemesTable themes = getThemesTable(); |
| int pos = 0; |
| //noinspection deprecation - for performance reasons! |
| for(CTRElt r : st.getRArray()){ |
| final int length = r.getT().length(); |
| if(index >= pos && index < pos + length) { |
| XSSFFont fnt = new XSSFFont(toCTFont(r.getRPr())); |
| fnt.setThemesTable(themes); |
| return fnt; |
| } |
| |
| pos += length; |
| } |
| return null; |
| |
| } |
| |
| /** |
| * Return the underlying xml bean |
| */ |
| @Internal |
| public CTRst getCTRst() { |
| return st; |
| } |
| |
| protected void setStylesTableReference(StylesTable tbl){ |
| styles = tbl; |
| if(st.sizeOfRArray() > 0) { |
| //noinspection deprecation - for performance reasons! |
| for (CTRElt r : st.getRArray()) { |
| CTRPrElt pr = r.getRPr(); |
| if(pr != null && pr.sizeOfRFontArray() > 0){ |
| String fontName = pr.getRFontArray(0).getVal(); |
| if(fontName.startsWith("#")){ |
| int idx = Integer.parseInt(fontName.substring(1)); |
| XSSFFont font = styles.getFontAt(idx); |
| pr.removeRFont(0); |
| setRunAttributes(font.getCTFont(), pr); |
| } |
| } |
| } |
| } |
| } |
| |
| /** |
| * |
| * CTRPrElt --> CTFont adapter |
| */ |
| protected static CTFont toCTFont(CTRPrElt pr){ |
| CTFont ctFont = CTFont.Factory.newInstance(); |
| |
| // Bug 58315: there are files where there is no pr-entry for a RichTextString |
| if(pr == null) { |
| return ctFont; |
| } |
| |
| if(pr.sizeOfBArray() > 0) ctFont.addNewB().setVal(pr.getBArray(0).getVal()); |
| if(pr.sizeOfUArray() > 0) ctFont.addNewU().setVal(pr.getUArray(0).getVal()); |
| if(pr.sizeOfIArray() > 0) ctFont.addNewI().setVal(pr.getIArray(0).getVal()); |
| if(pr.sizeOfColorArray() > 0) { |
| CTColor c1 = pr.getColorArray(0); |
| CTColor c2 = ctFont.addNewColor(); |
| if(c1.isSetAuto()) c2.setAuto(c1.getAuto()); |
| if(c1.isSetIndexed()) c2.setIndexed(c1.getIndexed()); |
| if(c1.isSetRgb()) c2.setRgb(c1.getRgb()); |
| if(c1.isSetTheme()) c2.setTheme(c1.getTheme()); |
| if(c1.isSetTint()) c2.setTint(c1.getTint()); |
| } |
| if(pr.sizeOfSzArray() > 0) ctFont.addNewSz().setVal(pr.getSzArray(0).getVal()); |
| if(pr.sizeOfRFontArray() > 0) ctFont.addNewName().setVal(pr.getRFontArray(0).getVal()); |
| if(pr.sizeOfFamilyArray() > 0) ctFont.addNewFamily().setVal(pr.getFamilyArray(0).getVal()); |
| if(pr.sizeOfSchemeArray() > 0) ctFont.addNewScheme().setVal(pr.getSchemeArray(0).getVal()); |
| if(pr.sizeOfCharsetArray() > 0) ctFont.addNewCharset().setVal(pr.getCharsetArray(0).getVal()); |
| if(pr.sizeOfCondenseArray() > 0) ctFont.addNewCondense().setVal(pr.getCondenseArray(0).getVal()); |
| if(pr.sizeOfExtendArray() > 0) ctFont.addNewExtend().setVal(pr.getExtendArray(0).getVal()); |
| if(pr.sizeOfVertAlignArray() > 0) ctFont.addNewVertAlign().setVal(pr.getVertAlignArray(0).getVal()); |
| if(pr.sizeOfOutlineArray() > 0) ctFont.addNewOutline().setVal(pr.getOutlineArray(0).getVal()); |
| if(pr.sizeOfShadowArray() > 0) ctFont.addNewShadow().setVal(pr.getShadowArray(0).getVal()); |
| if(pr.sizeOfStrikeArray() > 0) ctFont.addNewStrike().setVal(pr.getStrikeArray(0).getVal()); |
| |
| return ctFont; |
| } |
| |
| /** |
| * Add the xml:spaces="preserve" attribute if the string has leading or trailing spaces |
| * |
| * @param xs the string to check |
| */ |
| protected static void preserveSpaces(STXstring xs) { |
| String text = xs.getStringValue(); |
| if (text != null && text.length() > 0) { |
| char firstChar = text.charAt(0); |
| char lastChar = text.charAt(text.length() - 1); |
| if(Character.isWhitespace(firstChar) || Character.isWhitespace(lastChar)) { |
| XmlCursor c = xs.newCursor(); |
| c.toNextToken(); |
| c.insertAttributeWithValue(new QName("http://www.w3.org/XML/1998/namespace", "space"), "preserve"); |
| c.dispose(); |
| } |
| } |
| } |
| |
| /** |
| * For all characters which cannot be represented in XML as defined by the XML 1.0 specification, |
| * the characters are escaped using the Unicode numerical character representation escape character |
| * format _xHHHH_, where H represents a hexadecimal character in the character's value. |
| * <p> |
| * Example: The Unicode character 0D is invalid in an XML 1.0 document, |
| * so it shall be escaped as <code>_x000D_</code>. |
| * </p> |
| * See section 3.18.9 in the OOXML spec. |
| * |
| * @param value the string to decode |
| * @return the decoded string |
| */ |
| static String utfDecode(String value){ |
| if(value == null || !value.contains("_x")) { |
| return value; |
| } |
| |
| StringBuilder buf = new StringBuilder(); |
| Matcher m = utfPtrn.matcher(value); |
| int idx = 0; |
| while(m.find()) { |
| int pos = m.start(); |
| if( pos > idx) { |
| buf.append(value.substring(idx, pos)); |
| } |
| |
| String code = m.group(1); |
| int icode = Integer.decode("0x" + code); |
| buf.append((char)icode); |
| |
| idx = m.end(); |
| } |
| |
| // small optimization: don't go via StringBuilder if not necessary, |
| // the encodings are very rare, so we should almost always go via this shortcut. |
| if(idx == 0) { |
| return value; |
| } |
| |
| buf.append(value.substring(idx)); |
| return buf.toString(); |
| } |
| |
| void applyFont(TreeMap<Integer, CTRPrElt> formats, int startIndex, int endIndex, CTRPrElt fmt) { |
| // delete format runs that fit between startIndex and endIndex |
| // runs intersecting startIndex and endIndex remain |
| int runStartIdx = 0; |
| for (Iterator<Integer> it = formats.keySet().iterator(); it.hasNext();) { |
| int runEndIdx = it.next(); |
| if (runStartIdx >= startIndex && runEndIdx < endIndex) { |
| it.remove(); |
| } |
| runStartIdx = runEndIdx; |
| } |
| |
| if(startIndex > 0 && !formats.containsKey(startIndex)) { |
| // If there's a format that starts later in the string, make it start now |
| for(Map.Entry<Integer, CTRPrElt> entry : formats.entrySet()) { |
| if(entry.getKey() > startIndex) { |
| formats.put(startIndex, entry.getValue()); |
| break; |
| } |
| } |
| } |
| formats.put(endIndex, fmt); |
| |
| // assure that the range [startIndex, endIndex] consists if a single run |
| // there can be two or three runs depending whether startIndex or endIndex |
| // intersected existing format runs |
| SortedMap<Integer, CTRPrElt> sub = formats.subMap(startIndex, endIndex); |
| while(sub.size() > 1) sub.remove(sub.lastKey()); |
| } |
| |
| TreeMap<Integer, CTRPrElt> getFormatMap(CTRst entry){ |
| int length = 0; |
| TreeMap<Integer, CTRPrElt> formats = new TreeMap<Integer, CTRPrElt>(); |
| //noinspection deprecation - for performance reasons! |
| for (CTRElt r : entry.getRArray()) { |
| String txt = r.getT(); |
| CTRPrElt fmt = r.getRPr(); |
| |
| length += txt.length(); |
| formats.put(length, fmt); |
| } |
| return formats; |
| } |
| |
| CTRst buildCTRst(String text, TreeMap<Integer, CTRPrElt> formats){ |
| if(text.length() != formats.lastKey()) { |
| throw new IllegalArgumentException("Text length was " + text.length() + |
| " but the last format index was " + formats.lastKey()); |
| } |
| CTRst stf = CTRst.Factory.newInstance(); |
| int runStartIdx = 0; |
| for (Map.Entry<Integer, CTRPrElt> me : formats.entrySet()) { |
| int runEndIdx = me.getKey(); |
| CTRElt run = stf.addNewR(); |
| String fragment = text.substring(runStartIdx, runEndIdx); |
| run.setT(fragment); |
| preserveSpaces(run.xgetT()); |
| |
| CTRPrElt fmt = me.getValue(); |
| if (fmt != null) { |
| run.setRPr(fmt); |
| } |
| runStartIdx = runEndIdx; |
| } |
| return stf; |
| } |
| |
| private ThemesTable getThemesTable() { |
| if(styles == null) return null; |
| return styles.getTheme(); |
| } |
| } |