| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* $Id$ */ |
| |
| package org.apache.fop.complexscripts.bidi; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| |
| import org.apache.fop.traits.Direction; |
| import org.apache.fop.util.CharUtilities; |
| |
| // CSOFF: LineLengthCheck |
| |
| /** |
| * <p>The <code>UnicodeBidiAlgorithm</code> class implements functionality prescribed by |
| * the Unicode Bidirectional Algorithm, Unicode Standard Annex #9.</p> |
| * |
| * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p> |
| */ |
| public final class UnicodeBidiAlgorithm implements BidiConstants { |
| |
| /** |
| * logging instance |
| */ |
| private static final Log log = LogFactory.getLog(UnicodeBidiAlgorithm.class); |
| |
| private UnicodeBidiAlgorithm() { |
| } |
| |
| /** |
| * Resolve the directionality levels of each character in a character seqeunce. |
| * If some character is encoded in the character sequence as a Unicode Surrogate Pair, |
| * then the directionality level of each of the two members of the pair will be identical. |
| * @return null if bidirectional processing is not required; otherwise, returns an array |
| * of integers, where each integer corresponds to exactly one UTF-16 |
| * encoding element present in the input character sequence, and where each integer denotes |
| * the directionality level of the corresponding encoding element |
| * @param cs input character sequence representing a UTF-16 encoded string |
| * @param defaultLevel the default paragraph level, which must be zero (LR) or one (RL) |
| */ |
| public static int[] resolveLevels(CharSequence cs, Direction defaultLevel) { |
| int[] chars = new int [ cs.length() ]; |
| if (convertToScalar(cs, chars) || (defaultLevel == Direction.RL)) { |
| return resolveLevels(chars, (defaultLevel == Direction.RL) ? 1 : 0, new int [ chars.length ]); |
| } else { |
| return null; |
| } |
| } |
| |
| /** |
| * Resolve the directionality levels of each character in a character seqeunce. |
| * @return null if bidirectional processing is not required; otherwise, returns an array |
| * of integers, where each integer corresponds to exactly one UTF-16 |
| * encoding element present in the input character sequence, and where each integer denotes |
| * the directionality level of the corresponding encoding element |
| * @param chars array of input characters represented as unicode scalar values |
| * @param defaultLevel the default paragraph level, which must be zero (LR) or one (RL) |
| * @param levels array to receive levels, one for each character in chars array |
| */ |
| public static int[] resolveLevels(int[] chars, int defaultLevel, int[] levels) { |
| return resolveLevels(chars, getClasses(chars), defaultLevel, levels, false); |
| } |
| |
| /** |
| * Resolve the directionality levels of each character in a character seqeunce. |
| * @return null if bidirectional processing is not required; otherwise, returns an array |
| * of integers, where each integer corresponds to exactly one UTF-16 |
| * encoding element present in the input character sequence, and where each integer denotes |
| * the directionality level of the corresponding encoding element |
| * @param chars array of input characters represented as unicode scalar values |
| * @param classes array containing one bidi class per character in chars array |
| * @param defaultLevel the default paragraph level, which must be zero (LR) or one (RL) |
| * @param levels array to receive levels, one for each character in chars array |
| * @param useRuleL1 true if rule L1 should be used |
| */ |
| public static int[] resolveLevels(int[] chars, int[] classes, int defaultLevel, int[] levels, boolean useRuleL1) { |
| int[] ica = classes; |
| int[] wca = copySequence(ica); |
| int[] ea = new int [ levels.length ]; |
| resolveExplicit(wca, defaultLevel, ea); |
| resolveRuns(wca, defaultLevel, ea, levelsFromEmbeddings(ea, levels)); |
| if (useRuleL1) { |
| resolveSeparators(ica, wca, defaultLevel, levels); |
| } |
| dump("RL: CC(" + ((chars != null) ? chars.length : -1) + ")", chars, classes, defaultLevel, levels); |
| return levels; |
| } |
| |
| private static int[] copySequence(int[] ta) { |
| int[] na = new int [ ta.length ]; |
| System.arraycopy(ta, 0, na, 0, na.length); |
| return na; |
| } |
| |
| private static void resolveExplicit(int[] wca, int defaultLevel, int[] ea) { |
| int[] es = new int [ MAX_LEVELS ]; /* embeddings stack */ |
| int ei = 0; /* embeddings stack index */ |
| int ec = defaultLevel; /* current embedding level */ |
| for (int i = 0, n = wca.length; i < n; i++) { |
| int bc = wca [ i ]; /* bidi class of current char */ |
| int el; /* embedding level to assign to current char */ |
| switch (bc) { |
| case LRE: // start left-to-right embedding |
| case RLE: // start right-to-left embedding |
| case LRO: // start left-to-right override |
| case RLO: // start right-to-left override |
| int en; /* new embedding level */ |
| if ((bc == RLE) || (bc == RLO)) { |
| en = ((ec & ~OVERRIDE) + 1) | 1; |
| } else { |
| en = ((ec & ~OVERRIDE) + 2) & ~1; |
| } |
| if (en < (MAX_LEVELS + 1)) { |
| es [ ei++ ] = ec; |
| if ((bc == LRO) || (bc == RLO)) { |
| ec = en | OVERRIDE; |
| } else { |
| ec = en & ~OVERRIDE; |
| } |
| } else { |
| // max levels exceeded, so don't change level or override |
| } |
| el = ec; |
| break; |
| case PDF: // pop directional formatting |
| el = ec; |
| if (ei > 0) { |
| ec = es [ --ei ]; |
| } else { |
| // ignore isolated PDF |
| } |
| break; |
| case B: // paragraph separator |
| el = ec = defaultLevel; |
| ei = 0; |
| break; |
| default: |
| el = ec; |
| break; |
| } |
| switch (bc) { |
| case BN: |
| break; |
| case LRE: case RLE: case LRO: case RLO: case PDF: |
| wca [ i ] = BN; |
| break; |
| default: |
| if ((el & OVERRIDE) != 0) { |
| wca [ i ] = directionOfLevel(el); |
| } |
| break; |
| } |
| ea [ i ] = el; |
| } |
| } |
| |
| private static int directionOfLevel(int level) { |
| return ((level & 1) != 0) ? R : L; |
| } |
| |
| private static int levelOfEmbedding(int embedding) { |
| return embedding & ~OVERRIDE; |
| } |
| |
| private static int[] levelsFromEmbeddings(int[] ea, int[] la) { |
| assert ea != null; |
| assert la != null; |
| assert la.length == ea.length; |
| for (int i = 0, n = la.length; i < n; i++) { |
| la [ i ] = levelOfEmbedding(ea [ i ]); |
| } |
| return la; |
| } |
| |
| private static void resolveRuns(int[] wca, int defaultLevel, int[] ea, int[] la) { |
| if (la.length != wca.length) { |
| throw new IllegalArgumentException("levels sequence length must match classes sequence length"); |
| } else if (la.length != ea.length) { |
| throw new IllegalArgumentException("levels sequence length must match embeddings sequence length"); |
| } else { |
| for (int i = 0, n = ea.length, lPrev = defaultLevel; i < n; ) { |
| int s = i; |
| int e = s; |
| int l = findNextNonRetainedFormattingLevel(wca, ea, s, lPrev); |
| while (e < n) { |
| if (la [ e ] != l) { |
| if (startsWithRetainedFormattingRun(wca, ea, e)) { |
| e += getLevelRunLength(ea, e); |
| } else { |
| break; |
| } |
| } else { |
| e++; |
| } |
| } |
| lPrev = resolveRun(wca, defaultLevel, ea, la, s, e, l, lPrev); |
| i = e; |
| } |
| } |
| } |
| |
| private static int findNextNonRetainedFormattingLevel(int[] wca, int[] ea, int start, int lPrev) { |
| int s = start; |
| int e = wca.length; |
| while (s < e) { |
| if (startsWithRetainedFormattingRun(wca, ea, s)) { |
| s += getLevelRunLength(ea, s); |
| } else { |
| break; |
| } |
| } |
| if (s < e) { |
| return levelOfEmbedding(ea [ s ]); |
| } else { |
| return lPrev; |
| } |
| } |
| |
| private static int getLevelRunLength(int[] ea, int start) { |
| assert start < ea.length; |
| int nl = 0; |
| for (int s = start, e = ea.length, l0 = levelOfEmbedding(ea [ start ]); s < e; s++) { |
| if (levelOfEmbedding(ea [ s ]) == l0) { |
| nl++; |
| } else { |
| break; |
| } |
| } |
| return nl; |
| } |
| |
| private static boolean startsWithRetainedFormattingRun(int[] wca, int[] ea, int start) { |
| int nl = getLevelRunLength(ea, start); |
| if (nl > 0) { |
| int nc = getRetainedFormattingRunLength(wca, start); |
| return (nc >= nl); |
| } else { |
| return false; |
| } |
| } |
| |
| private static int getRetainedFormattingRunLength(int[] wca, int start) { |
| assert start < wca.length; |
| int nc = 0; |
| for (int s = start, e = wca.length; s < e; s++) { |
| if (wca [ s ] == BidiConstants.BN) { |
| nc++; |
| } else { |
| break; |
| } |
| } |
| return nc; |
| } |
| |
| private static int resolveRun(int[] wca, int defaultLevel, int[] ea, int[] la, int start, int end, int level, int levelPrev) { |
| |
| // determine start of run direction |
| int sor = directionOfLevel(max(levelPrev, level)); |
| |
| // determine end of run direction |
| int le = -1; |
| if (end == wca.length) { |
| le = max(level, defaultLevel); |
| } else { |
| for (int i = end; i < wca.length; i++) { |
| if (wca [ i ] != BidiConstants.BN) { |
| le = max(level, la [ i ]); |
| break; |
| } |
| } |
| if (le < 0) { |
| le = max(level, defaultLevel); |
| } |
| } |
| int eor = directionOfLevel(le); |
| |
| if (log.isDebugEnabled()) { |
| log.debug("BR[" + padLeft(start, 3) + "," + padLeft(end, 3) + "] :" + padLeft(level, 2) + ": SOR(" + getClassName(sor) + "), EOR(" + getClassName(eor) + ")"); |
| } |
| |
| resolveWeak(wca, defaultLevel, ea, la, start, end, level, sor, eor); |
| resolveNeutrals(wca, defaultLevel, ea, la, start, end, level, sor, eor); |
| resolveImplicit(wca, defaultLevel, ea, la, start, end, level, sor, eor); |
| |
| // if this run is all retained formatting, then return prior level, otherwise this run's level |
| return isRetainedFormatting(wca, start, end) ? levelPrev : level; |
| } |
| |
| private static void resolveWeak(int[] wca, int defaultLevel, int[] ea, int[] la, int start, int end, int level, int sor, int eor) { |
| |
| // W1 - X BN* NSM -> X BN* X |
| for (int i = start, n = end, bcPrev = sor; i < n; i++) { |
| int bc = wca [ i ]; |
| if (bc == NSM) { |
| wca [ i ] = bcPrev; |
| } else if (bc != BN) { |
| bcPrev = bc; |
| } |
| } |
| |
| // W2 - AL ... EN -> AL ... AN |
| for (int i = start, n = end, bcPrev = sor; i < n; i++) { |
| int bc = wca [ i ]; |
| if (bc == EN) { |
| if (bcPrev == AL) { |
| wca [ i ] = AN; |
| } |
| } else if (isStrong(bc)) { |
| bcPrev = bc; |
| } |
| } |
| |
| // W3 - AL -> R |
| for (int i = start, n = end; i < n; i++) { |
| int bc = wca [ i ]; |
| if (bc == AL) { |
| wca [ i ] = R; |
| } |
| } |
| |
| // W4 - EN BN* ES BN* EN -> EN BN* EN BN* EN; XN BN* CS BN* XN -> XN BN* XN BN* XN |
| for (int i = start, n = end, bcPrev = sor; i < n; i++) { |
| int bc = wca [ i ]; |
| if (bc == ES) { |
| int bcNext = eor; |
| for (int j = i + 1; j < n; j++) { |
| if ((bc = wca [ j ]) != BN) { |
| bcNext = bc; |
| break; |
| } |
| } |
| if ((bcPrev == EN) && (bcNext == EN)) { |
| wca [ i ] = EN; |
| } |
| } else if (bc == CS) { |
| int bcNext = eor; |
| for (int j = i + 1; j < n; j++) { |
| if ((bc = wca [ j ]) != BN) { |
| bcNext = bc; |
| break; |
| } |
| } |
| if ((bcPrev == EN) && (bcNext == EN)) { |
| wca [ i ] = EN; |
| } else if ((bcPrev == AN) && (bcNext == AN)) { |
| wca [ i ] = AN; |
| } |
| } |
| if (bc != BN) { |
| bcPrev = bc; |
| } |
| } |
| |
| // W5 - EN (ET|BN)* -> EN (EN|BN)*; (ET|BN)* EN -> (EN|BN)* EN |
| for (int i = start, n = end, bcPrev = sor; i < n; i++) { |
| int bc = wca [ i ]; |
| if (bc == ET) { |
| int bcNext = eor; |
| for (int j = i + 1; j < n; j++) { |
| bc = wca [ j ]; |
| if ((bc != BN) && (bc != ET)) { |
| bcNext = bc; |
| break; |
| } |
| } |
| if ((bcPrev == EN) || (bcNext == EN)) { |
| wca [ i ] = EN; |
| } |
| } else if (bc != BN) { |
| bcPrev = bc; |
| } |
| } |
| |
| // W6 - BN* (ET|ES|CS) BN* -> ON* ON ON* |
| for (int i = start, n = end; i < n; i++) { |
| int bc = wca [ i ]; |
| if ((bc == ET) || (bc == ES) || (bc == CS)) { |
| wca [ i ] = ON; |
| resolveAdjacentBoundaryNeutrals(wca, start, end, i, ON); |
| } |
| } |
| |
| // W7 - L ... EN -> L ... L |
| for (int i = start, n = end, bcPrev = sor; i < n; i++) { |
| int bc = wca [ i ]; |
| if (bc == EN) { |
| if (bcPrev == L) { |
| wca [ i ] = L; |
| } |
| } else if ((bc == L) || (bc == R)) { |
| bcPrev = bc; |
| } |
| } |
| |
| } |
| |
| private static void resolveNeutrals(int[] wca, int defaultLevel, int[] ea, int[] la, int start, int end, int level, int sor, int eor) { |
| |
| // N1 - (L|R) N+ (L|R) -> L L+ L | R R+ R; (AN|EN) N+ R -> (AN|EN) R+ R; R N+ (AN|EN) -> R R+ (AN|EN) |
| for (int i = start, n = end, bcPrev = sor; i < n; i++) { |
| int bc = wca [ i ]; |
| if (isNeutral(bc)) { |
| int bcNext = eor; |
| for (int j = i + 1; j < n; j++) { |
| bc = wca [ j ]; |
| if ((bc == L) || (bc == R)) { |
| bcNext = bc; |
| break; |
| } else if ((bc == AN) || (bc == EN)) { |
| bcNext = R; |
| break; |
| } else if (isNeutral(bc)) { |
| continue; |
| } else if (isRetainedFormatting(bc)) { |
| continue; |
| } else { |
| break; |
| } |
| } |
| if (bcPrev == bcNext) { |
| wca [ i ] = bcPrev; |
| resolveAdjacentBoundaryNeutrals(wca, start, end, i, bcPrev); |
| } |
| } else if ((bc == L) || (bc == R)) { |
| bcPrev = bc; |
| } else if ((bc == AN) || (bc == EN)) { |
| bcPrev = R; |
| } |
| } |
| |
| // N2 - N -> embedding level |
| for (int i = start, n = end; i < n; i++) { |
| int bc = wca [ i ]; |
| if (isNeutral(bc)) { |
| int bcEmbedding = directionOfLevel(levelOfEmbedding(ea [ i ])); |
| wca [ i ] = bcEmbedding; |
| resolveAdjacentBoundaryNeutrals(wca, start, end, i, bcEmbedding); |
| } |
| } |
| |
| } |
| |
| private static void resolveAdjacentBoundaryNeutrals(int[] wca, int start, int end, int index, int bcNew) { |
| if ((index < start) || (index >= end)) { |
| throw new IllegalArgumentException(); |
| } else { |
| for (int i = index - 1; i >= start; i--) { |
| int bc = wca [ i ]; |
| if (bc == BN) { |
| wca [ i ] = bcNew; |
| } else { |
| break; |
| } |
| } |
| for (int i = index + 1; i < end; i++) { |
| int bc = wca [ i ]; |
| if (bc == BN) { |
| wca [ i ] = bcNew; |
| } else { |
| break; |
| } |
| } |
| } |
| } |
| |
| private static void resolveImplicit(int[] wca, int defaultLevel, int[] ea, int[] la, int start, int end, int level, int sor, int eor) { |
| for (int i = start, n = end; i < n; i++) { |
| int bc = wca [ i ]; // bidi class |
| int el = la [ i ]; // embedding level |
| int ed = 0; // embedding level delta |
| if ((el & 1) == 0) { // even |
| if (bc == R) { |
| ed = 1; |
| } else if (bc == AN) { |
| ed = 2; |
| } else if (bc == EN) { |
| ed = 2; |
| } |
| } else { // odd |
| if (bc == L) { |
| ed = 1; |
| } else if (bc == EN) { |
| ed = 1; |
| } else if (bc == AN) { |
| ed = 1; |
| } |
| } |
| la [ i ] = el + ed; |
| } |
| } |
| |
| /** |
| * Resolve separators and boundary neutral levels to account for UAX#9 3.4 L1 while taking into |
| * account retention of formatting codes (5.2). |
| * @param ica original input class array (sequence) |
| * @param wca working copy of original intput class array (sequence), as modified by prior steps |
| * @param dl default paragraph level |
| * @param la array of output levels to be adjusted, as produced by bidi algorithm |
| */ |
| private static void resolveSeparators(int[] ica, int[] wca, int dl, int[] la) { |
| // steps (1) through (3) |
| for (int i = 0, n = ica.length; i < n; i++) { |
| int ic = ica[i]; |
| if ((ic == BidiConstants.S) || (ic == BidiConstants.B)) { |
| la[i] = dl; |
| for (int k = i - 1; k >= 0; k--) { |
| int pc = ica[k]; |
| if (isRetainedFormatting(pc)) { |
| continue; |
| } else if (pc == BidiConstants.WS) { |
| la[k] = dl; |
| } else { |
| break; |
| } |
| } |
| } |
| } |
| // step (4) - consider end of input sequence to be end of line, but skip any trailing boundary neutrals and retained formatting codes |
| for (int i = ica.length; i > 0; i--) { |
| int k = i - 1; |
| int ic = ica[k]; |
| if (isRetainedFormatting(ic)) { |
| continue; |
| } else if (ic == BidiConstants.WS) { |
| la[k] = dl; |
| } else { |
| break; |
| } |
| } |
| // step (5) - per section 5.2 |
| for (int i = 0, n = ica.length; i < n; i++) { |
| int ic = ica[i]; |
| if (isRetainedFormatting(ic)) { |
| if (i == 0) { |
| la[i] = dl; |
| } else { |
| la[i] = la [ i - 1 ]; |
| } |
| } |
| } |
| } |
| |
| private static boolean isStrong(int bc) { |
| switch (bc) { |
| case L: |
| case R: |
| case AL: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| private static boolean isNeutral(int bc) { |
| switch (bc) { |
| case WS: |
| case ON: |
| case S: |
| case B: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| private static boolean isRetainedFormatting(int bc) { |
| switch (bc) { |
| case LRE: |
| case LRO: |
| case RLE: |
| case RLO: |
| case PDF: |
| case BN: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| private static boolean isRetainedFormatting(int[] ca, int s, int e) { |
| for (int i = s; i < e; i++) { |
| if (!isRetainedFormatting(ca[i])) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| private static int max(int x, int y) { |
| if (x > y) { |
| return x; |
| } else { |
| return y; |
| } |
| } |
| |
| private static int[] getClasses(int[] chars) { |
| int[] classes = new int [ chars.length ]; |
| int bc; |
| for (int i = 0, n = chars.length; i < n; i++) { |
| int ch = chars [ i ]; |
| if (ch >= 0) { |
| bc = BidiClass.getBidiClass(chars [ i ]); |
| } else { |
| bc = SURROGATE; |
| } |
| classes [ i ] = bc; |
| } |
| return classes; |
| } |
| |
| /** |
| * Convert character sequence (a UTF-16 encoded string) to an array of unicode scalar values |
| * expressed as integers. If a valid UTF-16 surrogate pair is encountered, it is converted to |
| * two integers, the first being the equivalent unicode scalar value, and the second being |
| * negative one (-1). This special mechanism is used to track the use of surrogate pairs while |
| * working with unicode scalar values, and permits maintaining indices that apply both to the |
| * input UTF-16 and out scalar value sequences. |
| * @return a boolean indicating that content is present that triggers bidirectional processing |
| * @param cs a UTF-16 encoded character sequence |
| * @param chars an integer array to accept the converted scalar values, where the length of the |
| * array must be the same as the length of the input character sequence |
| * @throws IllegalArgumentException if the input sequence is not a valid UTF-16 string, e.g., |
| * if it contains an isolated UTF-16 surrogate |
| */ |
| private static boolean convertToScalar(CharSequence cs, int[] chars) throws IllegalArgumentException { |
| boolean triggered = false; |
| if (chars.length != cs.length()) { |
| throw new IllegalArgumentException("characters array length must match input sequence length"); |
| } |
| for (int i = 0, n = chars.length; i < n; ) { |
| int chIn = cs.charAt(i); |
| int chOut; |
| if (chIn < 0xD800) { |
| chOut = chIn; |
| } else if (chIn < 0xDC00) { |
| int chHi = chIn; |
| int chLo; |
| if ((i + 1) < n) { |
| chLo = cs.charAt(i + 1); |
| if ((chLo >= 0xDC00) && (chLo <= 0xDFFF)) { |
| chOut = convertToScalar(chHi, chLo); |
| } else { |
| throw new IllegalArgumentException("isolated high surrogate"); |
| } |
| } else { |
| throw new IllegalArgumentException("truncated surrogate pair"); |
| } |
| } else if (chIn < 0xE000) { |
| throw new IllegalArgumentException("isolated low surrogate"); |
| } else { |
| chOut = chIn; |
| } |
| if (!triggered && triggersBidi(chOut)) { |
| triggered = true; |
| } |
| if ((chOut & 0xFF0000) == 0) { |
| chars [ i++ ] = chOut; |
| } else { |
| chars [ i++ ] = chOut; |
| chars [ i++ ] = -1; |
| } |
| } |
| return triggered; |
| } |
| |
| /** |
| * Convert UTF-16 surrogate pair to unicode scalar valuee. |
| * @return a unicode scalar value |
| * @param chHi high (most significant or first) surrogate |
| * @param chLo low (least significant or second) surrogate |
| * @throws IllegalArgumentException if one of the input surrogates is not valid |
| */ |
| private static int convertToScalar(int chHi, int chLo) { |
| if ((chHi < 0xD800) || (chHi > 0xDBFF)) { |
| throw new IllegalArgumentException("bad high surrogate"); |
| } else if ((chLo < 0xDC00) || (chLo > 0xDFFF)) { |
| throw new IllegalArgumentException("bad low surrogate"); |
| } else { |
| return (((chHi & 0x03FF) << 10) | (chLo & 0x03FF)) + 0x10000; |
| } |
| } |
| |
| /** |
| * Determine of character CH triggers bidirectional processing. Bidirectional |
| * processing is deemed triggerable if CH is a strong right-to-left character, |
| * an arabic letter or number, or is a right-to-left embedding or override |
| * character. |
| * @return true if character triggers bidirectional processing |
| * @param ch a unicode scalar value |
| */ |
| private static boolean triggersBidi(int ch) { |
| switch (BidiClass.getBidiClass(ch)) { |
| case R: |
| case AL: |
| case AN: |
| case RLE: |
| case RLO: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| private static void dump(String header, int[] chars, int[] classes, int defaultLevel, int[] levels) { |
| log.debug(header); |
| log.debug("BD: default level(" + defaultLevel + ")"); |
| StringBuffer sb = new StringBuffer(); |
| if (chars != null) { |
| for (int i = 0, n = chars.length; i < n; i++) { |
| int ch = chars [ i ]; |
| sb.setLength(0); |
| if ((ch > 0x20) && (ch < 0x7F)) { |
| sb.append((char) ch); |
| } else { |
| sb.append(CharUtilities.charToNCRef(ch)); |
| } |
| for (int k = sb.length(); k < 12; k++) { |
| sb.append(' '); |
| } |
| sb.append(": " + padRight(getClassName(classes[i]), 4) + " " + levels[i]); |
| log.debug(sb); |
| } |
| } else { |
| for (int i = 0, n = classes.length; i < n; i++) { |
| sb.setLength(0); |
| for (int k = sb.length(); k < 12; k++) { |
| sb.append(' '); |
| } |
| sb.append(": " + padRight(getClassName(classes[i]), 4) + " " + levels[i]); |
| log.debug(sb); |
| } |
| } |
| } |
| |
| private static String getClassName(int bc) { |
| switch (bc) { |
| case L: // left-to-right |
| return "L"; |
| case LRE: // left-to-right embedding |
| return "LRE"; |
| case LRO: // left-to-right override |
| return "LRO"; |
| case R: // right-to-left |
| return "R"; |
| case AL: // right-to-left arabic |
| return "AL"; |
| case RLE: // right-to-left embedding |
| return "RLE"; |
| case RLO: // right-to-left override |
| return "RLO"; |
| case PDF: // pop directional formatting |
| return "PDF"; |
| case EN: // european number |
| return "EN"; |
| case ES: // european number separator |
| return "ES"; |
| case ET: // european number terminator |
| return "ET"; |
| case AN: // arabic number |
| return "AN"; |
| case CS: // common number separator |
| return "CS"; |
| case NSM: // non-spacing mark |
| return "NSM"; |
| case BN: // boundary neutral |
| return "BN"; |
| case B: // paragraph separator |
| return "B"; |
| case S: // segment separator |
| return "S"; |
| case WS: // whitespace |
| return "WS"; |
| case ON: // other neutrals |
| return "ON"; |
| case SURROGATE: // placeholder for low surrogate |
| return "SUR"; |
| default: |
| return "?"; |
| } |
| } |
| |
| private static String padLeft(int n, int width) { |
| return padLeft(Integer.toString(n), width); |
| } |
| |
| private static String padLeft(String s, int width) { |
| StringBuffer sb = new StringBuffer(); |
| for (int i = s.length(); i < width; i++) { |
| sb.append(' '); |
| } |
| sb.append(s); |
| return sb.toString(); |
| } |
| |
| /* not used yet |
| private static String padRight ( int n, int width ) { |
| return padRight ( Integer.toString ( n ), width ); |
| } |
| */ |
| |
| private static String padRight(String s, int width) { |
| StringBuffer sb = new StringBuffer(s); |
| for (int i = sb.length(); i < width; i++) { |
| sb.append(' '); |
| } |
| return sb.toString(); |
| } |
| |
| } |