| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* $Id$ */ |
| |
| package org.apache.fop.fo; |
| |
| import java.awt.Color; |
| import java.nio.CharBuffer; |
| import java.util.NoSuchElementException; |
| |
| import org.xml.sax.Locator; |
| |
| import org.apache.fop.apps.FOPException; |
| import org.apache.fop.datatypes.Length; |
| import org.apache.fop.fo.flow.Block; |
| import org.apache.fop.fo.properties.CommonFont; |
| import org.apache.fop.fo.properties.CommonHyphenation; |
| import org.apache.fop.fo.properties.CommonTextDecoration; |
| import org.apache.fop.fo.properties.KeepProperty; |
| import org.apache.fop.fo.properties.Property; |
| import org.apache.fop.fo.properties.SpaceProperty; |
| import org.apache.fop.util.CharUtilities; |
| |
| /** |
| * A text node (PCDATA) in the formatting object tree. |
| */ |
| public class FOText extends FONode implements CharSequence { |
| |
| /** the <code>CharBuffer</code> containing the text */ |
| private CharBuffer charBuffer; |
| |
| /** properties relevant for #PCDATA */ |
| private CommonFont commonFont; |
| private CommonHyphenation commonHyphenation; |
| private Color color; |
| private KeepProperty keepTogether; |
| private Property letterSpacing; |
| private SpaceProperty lineHeight; |
| private int whiteSpaceTreatment; |
| private int whiteSpaceCollapse; |
| private int textTransform; |
| private Property wordSpacing; |
| private int wrapOption; |
| private Length baselineShift; |
| |
| /** |
| * Points to the previous FOText object created within the current |
| * block. If this is "null", this is the first such object. |
| */ |
| private FOText prevFOTextThisBlock = null; |
| |
| /** |
| * Points to the next FOText object created within the current |
| * block. If this is "null", this is the last such object. |
| */ |
| private FOText nextFOTextThisBlock = null; |
| |
| /** |
| * Points to the ancestor Block object. This is used to keep track of |
| * which FOText nodes are descendants of the same block. |
| */ |
| private Block ancestorBlock = null; |
| |
| /** Holds the text decoration values. May be null */ |
| private CommonTextDecoration textDecoration; |
| |
| private static final int IS_WORD_CHAR_FALSE = 0; |
| private static final int IS_WORD_CHAR_TRUE = 1; |
| private static final int IS_WORD_CHAR_MAYBE = 2; |
| |
| /** |
| * Creates a new FO text node. |
| * |
| * @param parent FONode that is the parent of this object |
| */ |
| public FOText(FONode parent) { |
| super(parent); |
| } |
| |
| /** {@inheritDoc} */ |
| protected void characters(char[] data, int start, int length, |
| PropertyList list, Locator locator) throws FOPException { |
| |
| if (this.charBuffer == null) { |
| // buffer not yet initialized, do so now |
| this.charBuffer = CharBuffer.allocate(length); |
| } else { |
| // allocate a larger buffer, and transfer contents |
| int newLength = this.charBuffer.limit() + length; |
| CharBuffer newBuffer = CharBuffer.allocate(newLength); |
| this.charBuffer.rewind(); |
| newBuffer.put(this.charBuffer); |
| this.charBuffer = newBuffer; |
| } |
| // append characters |
| this.charBuffer.put(data, start, length); |
| |
| } |
| |
| /** |
| * Return the array of characters for this instance. |
| * |
| * @return a char array containing the text |
| */ |
| public char[] getCharArray() { |
| |
| if (this.charBuffer == null) { |
| return null; |
| } |
| |
| if (this.charBuffer.hasArray()) { |
| return this.charBuffer.array(); |
| } |
| |
| // only if the buffer implementation has |
| // no accessible backing array, return a new one |
| char[] ca = new char[this.charBuffer.limit()]; |
| this.charBuffer.rewind(); |
| this.charBuffer.get(ca); |
| return ca; |
| |
| } |
| |
| /** {@inheritDoc} */ |
| public FONode clone(FONode parent, boolean removeChildren) |
| throws FOPException { |
| FOText ft = (FOText) super.clone(parent, removeChildren); |
| if (removeChildren) { |
| // not really removing, just make sure the char buffer |
| // pointed to is really a different one |
| if (this.charBuffer != null) { |
| ft.charBuffer = CharBuffer.allocate(this.charBuffer.limit()); |
| this.charBuffer.rewind(); |
| ft.charBuffer.put(this.charBuffer); |
| ft.charBuffer.rewind(); |
| } |
| } |
| ft.prevFOTextThisBlock = null; |
| ft.nextFOTextThisBlock = null; |
| ft.ancestorBlock = null; |
| return ft; |
| } |
| |
| /** {@inheritDoc} */ |
| public void bind(PropertyList pList) throws FOPException { |
| this.commonFont = pList.getFontProps(); |
| this.commonHyphenation = pList.getHyphenationProps(); |
| this.color = pList.get(Constants.PR_COLOR).getColor(getUserAgent()); |
| this.keepTogether = pList.get(Constants.PR_KEEP_TOGETHER).getKeep(); |
| this.lineHeight = pList.get(Constants.PR_LINE_HEIGHT).getSpace(); |
| this.letterSpacing = pList.get(Constants.PR_LETTER_SPACING); |
| this.whiteSpaceCollapse = pList.get(Constants.PR_WHITE_SPACE_COLLAPSE).getEnum(); |
| this.whiteSpaceTreatment = pList.get(Constants.PR_WHITE_SPACE_TREATMENT).getEnum(); |
| this.textTransform = pList.get(Constants.PR_TEXT_TRANSFORM).getEnum(); |
| this.wordSpacing = pList.get(Constants.PR_WORD_SPACING); |
| this.wrapOption = pList.get(Constants.PR_WRAP_OPTION).getEnum(); |
| this.textDecoration = pList.getTextDecorationProps(); |
| this.baselineShift = pList.get(Constants.PR_BASELINE_SHIFT).getLength(); |
| } |
| |
| /** {@inheritDoc} */ |
| protected void endOfNode() throws FOPException { |
| super.endOfNode(); |
| getFOEventHandler().characters( |
| this.getCharArray(), 0, this.charBuffer.limit()); |
| } |
| |
| /** {@inheritDoc} */ |
| public void finalizeNode() { |
| textTransform(); |
| } |
| |
| /** |
| * Check if this text node will create an area. |
| * This means either there is non-whitespace or it is |
| * preserved whitespace. |
| * Maybe this just needs to check length > 0, since char iterators |
| * handle whitespace. |
| * |
| * @return true if this will create an area in the output |
| */ |
| public boolean willCreateArea() { |
| if (whiteSpaceCollapse == Constants.EN_FALSE |
| && this.charBuffer.limit() > 0) { |
| return true; |
| } |
| |
| char ch; |
| this.charBuffer.rewind(); |
| while (this.charBuffer.hasRemaining()) { |
| ch = this.charBuffer.get(); |
| if (!((ch == CharUtilities.SPACE) |
| || (ch == CharUtilities.LINEFEED_CHAR) |
| || (ch == CharUtilities.CARRIAGE_RETURN) |
| || (ch == CharUtilities.TAB))) { |
| // not whitespace |
| this.charBuffer.rewind(); |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * @return a new TextCharIterator |
| */ |
| public CharIterator charIterator() { |
| return new TextCharIterator(); |
| } |
| |
| /** |
| * This method is run as part of the ancestor Block's flushText(), to |
| * create xref pointers to the previous FOText objects within the same Block |
| * @param ancestorBlock the ancestor fo:block |
| */ |
| protected void createBlockPointers(Block ancestorBlock) { |
| this.ancestorBlock = ancestorBlock; |
| // if the last FOText is a sibling, point to it, and have it point here |
| if (ancestorBlock.lastFOTextProcessed != null) { |
| if (ancestorBlock.lastFOTextProcessed.ancestorBlock |
| == this.ancestorBlock) { |
| prevFOTextThisBlock = ancestorBlock.lastFOTextProcessed; |
| prevFOTextThisBlock.nextFOTextThisBlock = this; |
| } else { |
| prevFOTextThisBlock = null; |
| } |
| } |
| } |
| |
| /** |
| * This method is run as part of endOfNode(), to handle the |
| * text-transform property for accumulated FOText |
| */ |
| private void textTransform() { |
| if (getBuilderContext().inMarker() |
| || textTransform == Constants.EN_NONE) { |
| return; |
| } |
| |
| this.charBuffer.rewind(); |
| CharBuffer tmp = this.charBuffer.slice(); |
| char c; |
| int lim = this.charBuffer.limit(); |
| int pos = -1; |
| while (++pos < lim) { |
| c = this.charBuffer.get(); |
| switch (textTransform) { |
| case Constants.EN_UPPERCASE: |
| tmp.put(Character.toUpperCase(c)); |
| break; |
| case Constants.EN_LOWERCASE: |
| tmp.put(Character.toLowerCase(c)); |
| break; |
| case Constants.EN_CAPITALIZE: |
| if (isStartOfWord(pos)) { |
| /* |
| Use toTitleCase here. Apparently, some languages use |
| a different character to represent a letter when using |
| initial caps than when all of the letters in the word |
| are capitalized. We will try to let Java handle this. |
| */ |
| tmp.put(Character.toTitleCase(c)); |
| } else { |
| tmp.put(c); |
| } |
| break; |
| default: |
| //should never happen as the property subsystem catches that case |
| assert false; |
| //nop |
| } |
| } |
| } |
| |
| /** |
| * Determines whether a particular location in an FOText object's text is |
| * the start of a new "word". The use of "word" here is specifically for |
| * the text-transform property, but may be useful for other things as |
| * well, such as word-spacing. The definition of "word" is somewhat ambiguous |
| * and appears to be definable by the user agent. |
| * |
| * @param i index into charBuffer |
| * |
| * @return True if the character at this location is the start of a new |
| * word. |
| */ |
| private boolean isStartOfWord(int i) { |
| char prevChar = getRelativeCharInBlock(i, -1); |
| /* All we are really concerned about here is of what type prevChar |
| * is. If inputChar is not part of a word, then the Java |
| * conversions will (we hope) simply return inputChar. |
| */ |
| switch (isWordChar(prevChar)) { |
| case IS_WORD_CHAR_TRUE: |
| return false; |
| case IS_WORD_CHAR_FALSE: |
| return true; |
| /* "MAYBE" implies that additional context is needed. An example is a |
| * single-quote, either straight or closing, which might be interpreted |
| * as a possessive or a contraction, or might be a closing quote. |
| */ |
| case IS_WORD_CHAR_MAYBE: |
| char prevPrevChar = getRelativeCharInBlock(i, -2); |
| switch (isWordChar(prevPrevChar)) { |
| case IS_WORD_CHAR_TRUE: |
| return false; |
| case IS_WORD_CHAR_FALSE: |
| return true; |
| case IS_WORD_CHAR_MAYBE: |
| return true; |
| default: |
| return false; |
| } |
| default: |
| return false; |
| } |
| } |
| |
| /** |
| * Finds a character within the current Block that is relative in location |
| * to a character in the current FOText. Treats all FOText objects within a |
| * block as one unit, allowing text in adjoining FOText objects to be |
| * returned if the parameters are outside of the current object. |
| * |
| * @param i index into the CharBuffer |
| * @param offset signed integer with relative position within the |
| * block of the character to return. To return the character immediately |
| * preceding i, pass -1. To return the character immediately after i, |
| * pass 1. |
| * @return the character in the offset position within the block; \u0000 if |
| * the offset points to an area outside of the block. |
| */ |
| private char getRelativeCharInBlock(int i, int offset) { |
| |
| int charIndex = i + offset; |
| // The easy case is where the desired character is in the same FOText |
| if (charIndex >= 0 && charIndex < this.length()) { |
| return this.charAt(i + offset); |
| } |
| |
| // For now, we can't look at following FOText nodes |
| if (offset > 0) { |
| return CharUtilities.NULL_CHAR; |
| } |
| |
| // Remaining case has the text in some previous FOText node |
| boolean foundChar = false; |
| char charToReturn = CharUtilities.NULL_CHAR; |
| FOText nodeToTest = this; |
| int remainingOffset = offset + i; |
| while (!foundChar) { |
| if (nodeToTest.prevFOTextThisBlock == null) { |
| break; |
| } |
| nodeToTest = nodeToTest.prevFOTextThisBlock; |
| int diff = nodeToTest.length() + remainingOffset - 1; |
| if (diff >= 0) { |
| charToReturn = nodeToTest.charAt(diff); |
| foundChar = true; |
| } else { |
| remainingOffset += diff; |
| } |
| } |
| return charToReturn; |
| } |
| |
| /** |
| * @return The previous FOText node in this Block; null, if this is the |
| * first FOText in this Block. |
| */ |
| public FOText getPrevFOTextThisBlock () { |
| return prevFOTextThisBlock; |
| } |
| |
| /** |
| * @return The next FOText node in this Block; null if this is the last |
| * FOText in this Block; null if subsequent FOText nodes have not yet been |
| * processed. |
| */ |
| public FOText getNextFOTextThisBlock () { |
| return nextFOTextThisBlock; |
| } |
| |
| /** |
| * @return The nearest ancestor block object which contains this FOText. |
| */ |
| public Block getAncestorBlock () { |
| return ancestorBlock; |
| } |
| |
| /** |
| * Determines whether the input char should be considered part of a |
| * "word". This is used primarily to determine whether the character |
| * immediately following starts a new word, but may have other uses. |
| * We have not found a definition of "word" in the standard (1.0), so the |
| * logic used here is based on the programmer's best guess. |
| * |
| * @param inputChar the character to be tested. |
| * @return int IS_WORD_CHAR_TRUE, IS_WORD_CHAR_FALSE, or IS_WORD_CHAR_MAYBE, |
| * depending on whether the character should be considered part of a word |
| * or not. |
| */ |
| private static int isWordChar(char inputChar) { |
| switch (Character.getType(inputChar)) { |
| case Character.COMBINING_SPACING_MARK: |
| return IS_WORD_CHAR_TRUE; |
| case Character.CONNECTOR_PUNCTUATION: |
| return IS_WORD_CHAR_TRUE; |
| case Character.CONTROL: |
| return IS_WORD_CHAR_FALSE; |
| case Character.CURRENCY_SYMBOL: |
| return IS_WORD_CHAR_TRUE; |
| case Character.DASH_PUNCTUATION: |
| if (inputChar == '-') { |
| return IS_WORD_CHAR_TRUE; //hyphen |
| } |
| return IS_WORD_CHAR_FALSE; |
| case Character.DECIMAL_DIGIT_NUMBER: |
| return IS_WORD_CHAR_TRUE; |
| case Character.ENCLOSING_MARK: |
| return IS_WORD_CHAR_FALSE; |
| case Character.END_PUNCTUATION: |
| if (inputChar == '\u2019') { |
| return IS_WORD_CHAR_MAYBE; //apostrophe, right single quote |
| } |
| return IS_WORD_CHAR_FALSE; |
| case Character.FORMAT: |
| return IS_WORD_CHAR_FALSE; |
| case Character.LETTER_NUMBER: |
| return IS_WORD_CHAR_TRUE; |
| case Character.LINE_SEPARATOR: |
| return IS_WORD_CHAR_FALSE; |
| case Character.LOWERCASE_LETTER: |
| return IS_WORD_CHAR_TRUE; |
| case Character.MATH_SYMBOL: |
| return IS_WORD_CHAR_FALSE; |
| case Character.MODIFIER_LETTER: |
| return IS_WORD_CHAR_TRUE; |
| case Character.MODIFIER_SYMBOL: |
| return IS_WORD_CHAR_TRUE; |
| case Character.NON_SPACING_MARK: |
| return IS_WORD_CHAR_TRUE; |
| case Character.OTHER_LETTER: |
| return IS_WORD_CHAR_TRUE; |
| case Character.OTHER_NUMBER: |
| return IS_WORD_CHAR_TRUE; |
| case Character.OTHER_PUNCTUATION: |
| if (inputChar == '\'') { |
| return IS_WORD_CHAR_MAYBE; //ASCII apostrophe |
| } |
| return IS_WORD_CHAR_FALSE; |
| case Character.OTHER_SYMBOL: |
| return IS_WORD_CHAR_TRUE; |
| case Character.PARAGRAPH_SEPARATOR: |
| return IS_WORD_CHAR_FALSE; |
| case Character.PRIVATE_USE: |
| return IS_WORD_CHAR_FALSE; |
| case Character.SPACE_SEPARATOR: |
| return IS_WORD_CHAR_FALSE; |
| case Character.START_PUNCTUATION: |
| return IS_WORD_CHAR_FALSE; |
| case Character.SURROGATE: |
| return IS_WORD_CHAR_FALSE; |
| case Character.TITLECASE_LETTER: |
| return IS_WORD_CHAR_TRUE; |
| case Character.UNASSIGNED: |
| return IS_WORD_CHAR_FALSE; |
| case Character.UPPERCASE_LETTER: |
| return IS_WORD_CHAR_TRUE; |
| default: |
| return IS_WORD_CHAR_FALSE; |
| } |
| } |
| |
| private class TextCharIterator extends CharIterator { |
| |
| int currentPosition = 0; |
| |
| boolean canRemove = false; |
| boolean canReplace = false; |
| |
| /** {@inheritDoc} */ |
| public boolean hasNext() { |
| return (this.currentPosition < charBuffer.limit()); |
| } |
| |
| /** {@inheritDoc} */ |
| public char nextChar() { |
| |
| if (this.currentPosition < charBuffer.limit()) { |
| this.canRemove = true; |
| this.canReplace = true; |
| return charBuffer.get(currentPosition++); |
| } else { |
| throw new NoSuchElementException(); |
| } |
| |
| } |
| |
| /** {@inheritDoc} */ |
| public void remove() { |
| |
| if (this.canRemove) { |
| charBuffer.position(currentPosition); |
| // Slice the buffer at the current position |
| CharBuffer tmp = charBuffer.slice(); |
| // Reset position to before current character |
| charBuffer.position(--currentPosition); |
| if (tmp.hasRemaining()) { |
| // Transfer any remaining characters |
| charBuffer.mark(); |
| charBuffer.put(tmp); |
| charBuffer.reset(); |
| } |
| // Decrease limit |
| charBuffer.limit(charBuffer.limit() - 1); |
| // Make sure following calls fail, unless nextChar() was called |
| this.canRemove = false; |
| } else { |
| throw new IllegalStateException(); |
| } |
| |
| } |
| |
| /** {@inheritDoc} */ |
| public void replaceChar(char c) { |
| |
| if (this.canReplace) { |
| charBuffer.put(currentPosition - 1, c); |
| } else { |
| throw new IllegalStateException(); |
| } |
| |
| } |
| |
| } |
| |
| /** |
| * @return the Common Font Properties. |
| */ |
| public CommonFont getCommonFont() { |
| return commonFont; |
| } |
| |
| /** |
| * @return the Common Hyphenation Properties. |
| */ |
| public CommonHyphenation getCommonHyphenation() { |
| return commonHyphenation; |
| } |
| |
| /** |
| * @return the "color" property. |
| */ |
| public Color getColor() { |
| return color; |
| } |
| |
| /** |
| * @return the "keep-together" property. |
| */ |
| public KeepProperty getKeepTogether() { |
| return keepTogether; |
| } |
| |
| /** |
| * @return the "letter-spacing" property. |
| */ |
| public Property getLetterSpacing() { |
| return letterSpacing; |
| } |
| |
| /** |
| * @return the "line-height" property. |
| */ |
| public SpaceProperty getLineHeight() { |
| return lineHeight; |
| } |
| |
| /** |
| * @return the "white-space-treatment" property |
| */ |
| public int getWhitespaceTreatment() { |
| return whiteSpaceTreatment; |
| } |
| |
| /** |
| * @return the "word-spacing" property. |
| */ |
| public Property getWordSpacing() { |
| return wordSpacing; |
| } |
| |
| /** |
| * @return the "wrap-option" property. |
| */ |
| public int getWrapOption() { |
| return wrapOption; |
| } |
| |
| /** @return the "text-decoration" property. */ |
| public CommonTextDecoration getTextDecoration() { |
| return textDecoration; |
| } |
| |
| /** @return the baseline-shift property */ |
| public Length getBaseLineShift() { |
| return baselineShift; |
| } |
| |
| /** {@inheritDoc} */ |
| public String toString() { |
| return (this.charBuffer == null) ? "" : this.charBuffer.toString(); |
| } |
| |
| /** {@inheritDoc} */ |
| public String getLocalName() { |
| return "#PCDATA"; |
| } |
| |
| /** {@inheritDoc} */ |
| public String getNormalNamespacePrefix() { |
| return null; |
| } |
| |
| /** {@inheritDoc} */ |
| protected String gatherContextInfo() { |
| if (this.locator != null) { |
| return super.gatherContextInfo(); |
| } else { |
| return this.toString(); |
| } |
| } |
| |
| /** {@inheritDoc} */ |
| public char charAt(int position) { |
| return this.charBuffer.get(position); |
| } |
| |
| /** {@inheritDoc} */ |
| public CharSequence subSequence(int start, int end) { |
| return this.charBuffer.subSequence(start, end); |
| } |
| |
| /** {@inheritDoc} */ |
| public int length() { |
| return this.charBuffer.limit(); |
| } |
| |
| /** |
| * Resets the backing <code>java.nio.CharBuffer</code> |
| */ |
| public void resetBuffer() { |
| if (this.charBuffer != null) { |
| this.charBuffer.rewind(); |
| } |
| } |
| } |