| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* $Id$ */ |
| |
| package org.apache.fop.fo; |
| |
| import java.util.List; |
| import org.apache.fop.fo.flow.Block; |
| import org.apache.fop.fo.flow.Character; |
| import org.apache.fop.util.CharUtilities; |
| |
| /** |
| * Class encapsulating the functionality for white-space-handling |
| * during refinement stage. |
| * |
| */ |
| public class XMLWhiteSpaceHandler { |
| |
| // True if we are in a run of white space |
| private boolean inWhiteSpace = false; |
| // True if the last char was a linefeed |
| private boolean afterLinefeed = true; |
| // Counter, increased every time a non-white-space is encountered |
| private int nonWhiteSpaceCount; |
| |
| private Block currentBlock; |
| private FObj currentFO; |
| private int linefeedTreatment; |
| private int whiteSpaceTreatment; |
| private int whiteSpaceCollapse; |
| private FONode nextChild; |
| private boolean endOfBlock; |
| private boolean nextChildIsBlockLevel; |
| private RecursiveCharIterator charIter; |
| |
| private List discardableFOCharacters; |
| private List pendingInlines; |
| private CharIterator firstWhiteSpaceInSeq; |
| |
| /** |
| * Marks a Character object as discardable, so that it is effectively |
| * removed from the FOTree at the end of handleWhitespace() |
| * @param foChar the Character object to be removed from the list of |
| * childNodes |
| */ |
| public void addDiscardableFOChar(Character foChar) { |
| if (discardableFOCharacters == null) { |
| discardableFOCharacters = new java.util.ArrayList(); |
| } |
| discardableFOCharacters.add(foChar); |
| } |
| |
| /** |
| * Handle white-space for the fo that is passed in, starting at |
| * firstTextNode |
| * @param fo the FO for which to handle white-space |
| * @param firstTextNode the node at which to start |
| */ |
| public void handleWhiteSpace(FObjMixed fo, FONode firstTextNode) { |
| if (fo.getNameId() == Constants.FO_BLOCK) { |
| this.currentBlock = (Block) fo; |
| this.linefeedTreatment = currentBlock.getLinefeedTreatment(); |
| this.whiteSpaceCollapse = currentBlock.getWhitespaceCollapse(); |
| this.whiteSpaceTreatment = currentBlock.getWhitespaceTreatment(); |
| } else if (fo.getNameId() == Constants.FO_TITLE |
| || fo.getNameId() == Constants.FO_BOOKMARK_TITLE) { |
| /* Two special types of FO that can contain #PCDATA |
| * set properties to their initial values |
| */ |
| this.linefeedTreatment = Constants.EN_TREAT_AS_SPACE; |
| this.whiteSpaceCollapse = Constants.EN_TRUE; |
| this.whiteSpaceTreatment = Constants.EN_IGNORE_IF_SURROUNDING_LINEFEED; |
| } |
| currentFO = fo; |
| if (firstTextNode == null) { |
| //nothing to do but initialize related properties |
| return; |
| } |
| charIter = new RecursiveCharIterator(fo, firstTextNode); |
| inWhiteSpace = false; |
| int textNodeIndex = -1; |
| if (currentFO == currentBlock) { |
| textNodeIndex = fo.childNodes.indexOf(firstTextNode); |
| afterLinefeed = ((textNodeIndex == 0) |
| || (textNodeIndex > 0 |
| && ((FONode) fo.childNodes.get(textNodeIndex - 1)) |
| .getNameId() == Constants.FO_BLOCK)); |
| } |
| endOfBlock = (nextChild == null && currentFO == currentBlock); |
| if (nextChild != null) { |
| int nextChildId = nextChild.getNameId(); |
| nextChildIsBlockLevel = (nextChildId == Constants.FO_BLOCK |
| || nextChildId == Constants.FO_TABLE_AND_CAPTION |
| || nextChildId == Constants.FO_TABLE |
| || nextChildId == Constants.FO_LIST_BLOCK |
| || nextChildId == Constants.FO_BLOCK_CONTAINER); |
| } else { |
| nextChildIsBlockLevel = false; |
| } |
| handleWhiteSpace(); |
| if (currentFO == currentBlock |
| && pendingInlines != null |
| && !pendingInlines.isEmpty()) { |
| /* current FO is a block, and has pending inlines */ |
| if (endOfBlock || nextChildIsBlockLevel) { |
| if (nonWhiteSpaceCount == 0) { |
| /* handle white-space for all pending inlines*/ |
| PendingInline p; |
| for (int i = pendingInlines.size(); --i >= 0;) { |
| p = (PendingInline) pendingInlines.get(i); |
| charIter = (RecursiveCharIterator) p.firstTrailingWhiteSpace; |
| handleWhiteSpace(); |
| pendingInlines.remove(p); |
| } |
| } else { |
| /* there is non-white-space text between the pending |
| * inline(s) and the end of the block; |
| * clear list of pending inlines */ |
| pendingInlines.clear(); |
| } |
| } |
| } |
| if (currentFO != currentBlock && nextChild == null) { |
| /* current FO is not a block, and is about to end */ |
| if (nonWhiteSpaceCount > 0 && pendingInlines != null) { |
| /* there is non-white-space text between the pending |
| * inline(s) and the end of the non-block node; |
| * clear list of pending inlines */ |
| pendingInlines.clear(); |
| } |
| if (inWhiteSpace) { |
| /* means there is at least one trailing space in the |
| inline FO that is about to end */ |
| addPendingInline(fo); |
| } |
| } |
| } |
| |
| /** |
| * Handle white-space for the fo that is passed in, starting at |
| * firstTextNode (when a nested FO is encountered) |
| * @param fo the FO for which to handle white-space |
| * @param firstTextNode the node at which to start |
| * @param nextChild the child-node that will be added to the list after |
| * the last text-node |
| */ |
| public void handleWhiteSpace(FObjMixed fo, FONode firstTextNode, FONode nextChild) { |
| this.nextChild = nextChild; |
| handleWhiteSpace(fo, firstTextNode); |
| this.nextChild = null; |
| } |
| |
| private void handleWhiteSpace() { |
| |
| EOLchecker lfCheck = new EOLchecker(charIter); |
| |
| nonWhiteSpaceCount = 0; |
| |
| while (charIter.hasNext()) { |
| if (!inWhiteSpace) { |
| firstWhiteSpaceInSeq = charIter.mark(); |
| } |
| char currentChar = charIter.nextChar(); |
| int currentCharClass = CharUtilities.classOf(currentChar); |
| if (currentCharClass == CharUtilities.LINEFEED |
| && linefeedTreatment == Constants.EN_TREAT_AS_SPACE) { |
| // if we have a linefeed and it is supposed to be treated |
| // like a space, that's what we do and continue |
| currentChar = '\u0020'; |
| charIter.replaceChar('\u0020'); |
| currentCharClass = CharUtilities.classOf(currentChar); |
| } |
| switch (CharUtilities.classOf(currentChar)) { |
| case CharUtilities.XMLWHITESPACE: |
| // Some kind of whitespace character, except linefeed. |
| if (inWhiteSpace && whiteSpaceCollapse == Constants.EN_TRUE) { |
| // We are in a run of whitespace and should collapse |
| // Just delete the char |
| charIter.remove(); |
| } else { |
| // Do the white space treatment here |
| boolean bIgnore = false; |
| |
| switch (whiteSpaceTreatment) { |
| case Constants.EN_IGNORE: |
| bIgnore = true; |
| break; |
| case Constants.EN_IGNORE_IF_BEFORE_LINEFEED: |
| bIgnore = lfCheck.beforeLinefeed(); |
| break; |
| case Constants.EN_IGNORE_IF_SURROUNDING_LINEFEED: |
| bIgnore = afterLinefeed |
| || lfCheck.beforeLinefeed(); |
| break; |
| case Constants.EN_IGNORE_IF_AFTER_LINEFEED: |
| bIgnore = afterLinefeed; |
| break; |
| case Constants.EN_PRESERVE: |
| // nothing to do now, replacement takes place later |
| break; |
| default: |
| //nop |
| } |
| // Handle ignore and replacement |
| if (bIgnore) { |
| charIter.remove(); |
| } else { |
| // this is to retain a single space between words |
| inWhiteSpace = true; |
| if (currentChar != '\u0020') { |
| charIter.replaceChar('\u0020'); |
| } |
| } |
| } |
| break; |
| |
| case CharUtilities.LINEFEED: |
| // A linefeed |
| switch (linefeedTreatment) { |
| case Constants.EN_IGNORE: |
| charIter.remove(); |
| break; |
| case Constants.EN_TREAT_AS_ZERO_WIDTH_SPACE: |
| charIter.replaceChar(CharUtilities.ZERO_WIDTH_SPACE); |
| inWhiteSpace = false; |
| break; |
| case Constants.EN_PRESERVE: |
| lfCheck.reset(); |
| inWhiteSpace = false; |
| afterLinefeed = true; // for following whitespace |
| break; |
| default: |
| //nop |
| } |
| break; |
| |
| case CharUtilities.EOT: |
| // A "boundary" objects such as non-character inline |
| // or nested block object was encountered. (? can't happen) |
| // If any whitespace run in progress, finish it. |
| // FALL THROUGH |
| |
| default: |
| // Any other character |
| inWhiteSpace = false; |
| afterLinefeed = false; |
| nonWhiteSpaceCount++; |
| lfCheck.reset(); |
| break; |
| } |
| } |
| if (discardableFOCharacters != null |
| && !discardableFOCharacters.isEmpty()) { |
| currentFO.childNodes.removeAll(discardableFOCharacters); |
| discardableFOCharacters.clear(); |
| } |
| } |
| |
| private void addPendingInline(FObjMixed fo) { |
| if (pendingInlines == null) { |
| pendingInlines = new java.util.ArrayList(5); |
| } |
| pendingInlines.add(new PendingInline(fo, firstWhiteSpaceInSeq)); |
| } |
| |
| private class EOLchecker { |
| private boolean nextIsEOL = false; |
| private RecursiveCharIterator charIter; |
| |
| EOLchecker(CharIterator charIter) { |
| this.charIter = (RecursiveCharIterator) charIter; |
| } |
| |
| boolean beforeLinefeed() { |
| if (!nextIsEOL) { |
| CharIterator lfIter = charIter.mark(); |
| while (lfIter.hasNext()) { |
| int charClass = CharUtilities.classOf(lfIter.nextChar()); |
| if (charClass == CharUtilities.LINEFEED) { |
| if (linefeedTreatment == Constants.EN_PRESERVE) { |
| nextIsEOL = true; |
| return nextIsEOL; |
| } |
| } else if (charClass != CharUtilities.XMLWHITESPACE) { |
| return nextIsEOL; |
| } |
| } |
| // No more characters == end of text run |
| // means EOL if there either is a nested block to be added, |
| // or if this is the last text node in the current block |
| nextIsEOL = nextChildIsBlockLevel || endOfBlock; |
| } |
| return nextIsEOL; |
| } |
| |
| void reset() { |
| nextIsEOL = false; |
| } |
| } |
| |
| private class PendingInline { |
| protected FObjMixed fo; |
| protected CharIterator firstTrailingWhiteSpace; |
| |
| PendingInline(FObjMixed fo, CharIterator firstTrailingWhiteSpace) { |
| this.fo = fo; |
| this.firstTrailingWhiteSpace = firstTrailingWhiteSpace; |
| } |
| } |
| } |