| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* $Id$ */ |
| |
| package org.apache.fop.fonts; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| |
| // CSOFF: AvoidNestedBlocksCheck |
| // CSOFF: NoWhitespaceAfterCheck |
| // CSOFF: WhitespaceAfter |
| // CSOFF: InnerAssignmentCheck |
| // CSOFF: SimplifyBooleanReturnCheck |
| // CSOFF: LineLengthCheck |
| |
| /** |
| * <p>The <code>DevanagariScriptProcessor</code> class implements a script processor for |
| * performing glyph substitution and positioning operations on content associated with the Devanagari script.</p> |
| * @author Glenn Adams |
| */ |
| public class DevanagariScriptProcessor extends IndicScriptProcessor { |
| |
| /** logging instance */ |
| private static final Log log = LogFactory.getLog(DevanagariScriptProcessor.class); // CSOK: ConstantNameCheck |
| |
| DevanagariScriptProcessor ( String script ) { |
| super ( script ); |
| } |
| |
| @Override |
| protected Class<? extends DevanagariSyllabizer> getSyllabizerClass() { |
| return DevanagariSyllabizer.class; |
| } |
| |
| @Override |
| // find rightmost pre-base matra |
| protected int findPreBaseMatra ( GlyphSequence gs ) { |
| int ng = gs.getGlyphCount(); |
| int lk = -1; |
| for ( int i = ng; i > 0; i-- ) { |
| int k = i - 1; |
| if ( containsPreBaseMatra ( gs, k ) ) { |
| lk = k; |
| break; |
| } |
| } |
| return lk; |
| } |
| |
| @Override |
| // find leftmost pre-base matra target, starting from source |
| protected int findPreBaseMatraTarget ( GlyphSequence gs, int source ) { |
| int ng = gs.getGlyphCount(); |
| int lk = -1; |
| for ( int i = ( source < ng ) ? source : ng; i > 0; i-- ) { |
| int k = i - 1; |
| if ( containsConsonant ( gs, k ) ) { |
| if ( containsHalfConsonant ( gs, k ) ) { |
| lk = k; |
| } else if ( lk == -1 ) { |
| lk = k; |
| } else { |
| break; |
| } |
| } |
| } |
| return lk; |
| } |
| |
| private static boolean containsPreBaseMatra ( GlyphSequence gs, int k ) { |
| GlyphSequence.CharAssociation a = gs.getAssociation ( k ); |
| int[] ca = gs.getCharacterArray ( false ); |
| for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) { |
| if ( isPreM ( ca [ i ] ) ) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| private static boolean containsConsonant ( GlyphSequence gs, int k ) { |
| GlyphSequence.CharAssociation a = gs.getAssociation ( k ); |
| int[] ca = gs.getCharacterArray ( false ); |
| for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) { |
| if ( isC ( ca [ i ] ) ) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| private static boolean containsHalfConsonant ( GlyphSequence gs, int k ) { |
| Boolean half = (Boolean) gs.getAssociation ( k ) . getPredication ( "half" ); |
| return ( half != null ) ? half.booleanValue() : false; |
| } |
| |
| @Override |
| protected int findReph ( GlyphSequence gs ) { |
| int ng = gs.getGlyphCount(); |
| int li = -1; |
| for ( int i = 0; i < ng; i++ ) { |
| if ( containsReph ( gs, i ) ) { |
| li = i; |
| break; |
| } |
| } |
| return li; |
| } |
| |
| @Override |
| protected int findRephTarget ( GlyphSequence gs, int source ) { |
| int ng = gs.getGlyphCount(); |
| int c1 = -1; |
| int c2 = -1; |
| // first candidate target is after first non-half consonant |
| for ( int i = 0; i < ng; i++ ) { |
| if ( ( i != source ) && containsConsonant ( gs, i ) ) { |
| if ( ! containsHalfConsonant ( gs, i ) ) { |
| c1 = i + 1; |
| break; |
| } |
| } |
| } |
| // second candidate target is after last non-prebase matra after first candidate or before first syllable or vedic mark |
| for ( int i = ( c1 >= 0 ) ? c1 : 0; i < ng; i++ ) { |
| if ( containsMatra ( gs, i ) && ! containsPreBaseMatra ( gs, i ) ) { |
| c2 = i + 1; |
| } else if ( containsOtherMark ( gs, i ) ) { |
| c2 = i; |
| break; |
| } |
| } |
| if ( c2 >= 0 ) { |
| return c2; |
| } else if ( c1 >= 0 ) { |
| return c1; |
| } else { |
| return source; |
| } |
| } |
| |
| private static boolean containsReph ( GlyphSequence gs, int k ) { |
| Boolean rphf = (Boolean) gs.getAssociation ( k ) . getPredication ( "rphf" ); |
| return ( rphf != null ) ? rphf.booleanValue() : false; |
| } |
| |
| private static boolean containsMatra ( GlyphSequence gs, int k ) { |
| GlyphSequence.CharAssociation a = gs.getAssociation ( k ); |
| int[] ca = gs.getCharacterArray ( false ); |
| for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) { |
| if ( isM ( ca [ i ] ) ) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| private static boolean containsOtherMark ( GlyphSequence gs, int k ) { |
| GlyphSequence.CharAssociation a = gs.getAssociation ( k ); |
| int[] ca = gs.getCharacterArray ( false ); |
| for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) { |
| switch ( typeOf ( ca [ i ] ) ) { |
| case C_T: // tone (e.g., udatta, anudatta) |
| case C_A: // accent (e.g., acute, grave) |
| case C_O: // other (e.g., candrabindu, anusvara, visarga, etc) |
| return true; |
| default: |
| break; |
| } |
| } |
| return false; |
| } |
| |
| private static class DevanagariSyllabizer extends DefaultSyllabizer { |
| DevanagariSyllabizer ( String script, String language ) { |
| super ( script, language ); |
| } |
| @Override |
| // | C ... |
| protected int findStartOfSyllable ( int[] ca, int s, int e ) { |
| if ( ( s < 0 ) || ( s >= e ) ) { |
| return -1; |
| } else { |
| while ( s < e ) { |
| int c = ca [ s ]; |
| if ( isC ( c ) ) { |
| break; |
| } else { |
| s++; |
| } |
| } |
| return s; |
| } |
| } |
| @Override |
| // D* L? | ... |
| protected int findEndOfSyllable ( int[] ca, int s, int e ) { |
| if ( ( s < 0 ) || ( s >= e ) ) { |
| return -1; |
| } else { |
| int nd = 0; |
| int nl = 0; |
| int i; |
| // consume dead consonants |
| while ( ( i = isDeadConsonant ( ca, s, e ) ) > s ) { |
| s = i; nd++; |
| } |
| // consume zero or one live consonant |
| if ( ( i = isLiveConsonant ( ca, s, e ) ) > s ) { |
| s = i; nl++; |
| } |
| return ( ( nd > 0 ) || ( nl > 0 ) ) ? s : -1; |
| } |
| } |
| // D := ( C N? H )? |
| private int isDeadConsonant ( int[] ca, int s, int e ) { |
| if ( s < 0 ) { |
| return -1; |
| } else { |
| int c, i = 0; |
| int nc = 0, nh = 0; |
| do { |
| // C |
| if ( ( s + i ) < e ) { |
| c = ca [ s + i ]; |
| if ( isC ( c ) ) { |
| i++; |
| nc++; |
| } else { |
| break; |
| } |
| } |
| // N? |
| if ( ( s + i ) < e ) { |
| c = ca [ s + 1 ]; |
| if ( isN ( c ) ) { |
| i++; |
| } |
| } |
| // H |
| if ( ( s + i ) < e ) { |
| c = ca [ s + i ]; |
| if ( isH ( c ) ) { |
| i++; |
| nh++; |
| } else { |
| break; |
| } |
| } |
| } while ( false ); |
| return ( nc > 0 ) && ( nh > 0 ) ? s + i : -1; |
| } |
| } |
| // L := ( (C|V) N? X* )?; where X = ( MATRA | ACCENT MARK | TONE MARK | OTHER MARK ) |
| private int isLiveConsonant ( int[] ca, int s, int e ) { |
| if ( s < 0 ) { |
| return -1; |
| } else { |
| int c, i = 0; |
| int nc = 0, nv = 0, nx = 0; |
| do { |
| // C |
| if ( ( s + i ) < e ) { |
| c = ca [ s + i ]; |
| if ( isC ( c ) ) { |
| i++; |
| nc++; |
| } else if ( isV ( c ) ) { |
| i++; |
| nv++; |
| } else { |
| break; |
| } |
| } |
| // N? |
| if ( ( s + i ) < e ) { |
| c = ca [ s + i ]; |
| if ( isN ( c ) ) { |
| i++; |
| } |
| } |
| // X* |
| while ( ( s + i ) < e ) { |
| c = ca [ s + i ]; |
| if ( isX ( c ) ) { |
| i++; |
| nx++; |
| } else { |
| break; |
| } |
| } |
| } while ( false ); |
| // if no X but has H, then ignore C|I |
| if ( nx == 0 ) { |
| if ( ( s + i ) < e ) { |
| c = ca [ s + i ]; |
| if ( isH ( c ) ) { |
| if ( nc > 0 ) { |
| nc--; |
| } else if ( nv > 0 ) { |
| nv--; |
| } |
| } |
| } |
| } |
| return ( ( nc > 0 ) || ( nv > 0 ) ) ? s + i : -1; |
| } |
| } |
| } |
| |
| // devanagari character types |
| static final short C_U = 0; // unassigned |
| static final short C_C = 1; // consonant |
| static final short C_V = 2; // vowel |
| static final short C_M = 3; // vowel sign (matra) |
| static final short C_S = 4; // symbol or sign |
| static final short C_T = 5; // tone mark |
| static final short C_A = 6; // accent mark |
| static final short C_P = 7; // punctuation |
| static final short C_D = 8; // digit |
| static final short C_H = 9; // halant (virama) |
| static final short C_O = 10; // other signs |
| static final short C_N = 0x0100; // nukta(ized) |
| static final short C_R = 0x0200; // reph(ized) |
| static final short C_PRE = 0x0400; // pre-base |
| static final short C_M_TYPE = 0x00FF; // type mask |
| static final short C_M_FLAGS = 0x7F00; // flag mask |
| // devanagari block range |
| static final int ccaStart = 0x0900; // first code point mapped by cca // CSOK: ConstantNameCheck |
| static final int ccaEnd = 0x0980; // last code point + 1 mapped by cca // CSOK: ConstantNameCheck |
| // devanagari character type lookups |
| static final short[] cca = { // CSOK: ConstantNameCheck |
| C_O, // 0x0900 // INVERTED CANDRABINDU |
| C_O, // 0x0901 // CANDRABINDU |
| C_O, // 0x0902 // ANUSVARA |
| C_O, // 0x0903 // VISARGA |
| C_V, // 0x0904 // SHORT A |
| C_V, // 0x0905 // A |
| C_V, // 0x0906 // AA |
| C_V, // 0x0907 // I |
| C_V, // 0x0908 // II |
| C_V, // 0x0909 // U |
| C_V, // 0x090A // UU |
| C_V, // 0x090B // VOCALIC R |
| C_V, // 0x090C // VOCALIC L |
| C_V, // 0x090D // CANDRA E |
| C_V, // 0x090E // SHORT E |
| C_V, // 0x090F // E |
| C_V, // 0x0910 // AI |
| C_V, // 0x0911 // CANDRA O |
| C_V, // 0x0912 // SHORT O |
| C_V, // 0x0913 // O |
| C_V, // 0x0914 // AU |
| C_C, // 0x0915 // KA |
| C_C, // 0x0916 // KHA |
| C_C, // 0x0917 // GA |
| C_C, // 0x0918 // GHA |
| C_C, // 0x0919 // NGA |
| C_C, // 0x091A // CA |
| C_C, // 0x091B // CHA |
| C_C, // 0x091C // JA |
| C_C, // 0x091D // JHA |
| C_C, // 0x091E // NYA |
| C_C, // 0x091F // TTA |
| C_C, // 0x0920 // TTHA |
| C_C, // 0x0921 // DDA |
| C_C, // 0x0922 // DDHA |
| C_C, // 0x0923 // NNA |
| C_C, // 0x0924 // TA |
| C_C, // 0x0925 // THA |
| C_C, // 0x0926 // DA |
| C_C, // 0x0927 // DHA |
| C_C, // 0x0928 // NA |
| C_C, // 0x0929 // NNNA |
| C_C, // 0x092A // PA |
| C_C, // 0x092B // PHA |
| C_C, // 0x092C // BA |
| C_C, // 0x092D // BHA |
| C_C, // 0x092E // MA |
| C_C, // 0x092F // YA |
| C_C|C_R, // 0x0930 // RA // CSOK: WhitespaceAround |
| C_C|C_R|C_N, // 0x0931 // RRA = 0930+093C // CSOK: WhitespaceAround |
| C_C, // 0x0932 // LA |
| C_C, // 0x0933 // LLA |
| C_C, // 0x0934 // LLLA |
| C_C, // 0x0935 // VA |
| C_C, // 0x0936 // SHA |
| C_C, // 0x0937 // SSA |
| C_C, // 0x0938 // SA |
| C_C, // 0x0939 // HA |
| C_M, // 0x093A // OE (KASHMIRI) |
| C_M, // 0x093B // OOE (KASHMIRI) |
| C_N, // 0x093C // NUKTA |
| C_S, // 0x093D // AVAGRAHA |
| C_M, // 0x093E // AA |
| C_M|C_PRE, // 0x093F // I // CSOK: WhitespaceAround |
| C_M, // 0x0940 // II |
| C_M, // 0x0941 // U |
| C_M, // 0x0942 // UU |
| C_M, // 0x0943 // VOCALIC R |
| C_M, // 0x0944 // VOCALIC RR |
| C_M, // 0x0945 // CANDRA E |
| C_M, // 0x0946 // SHORT E |
| C_M, // 0x0947 // E |
| C_M, // 0x0948 // AI |
| C_M, // 0x0949 // CANDRA O |
| C_M, // 0x094A // SHORT O |
| C_M, // 0x094B // O |
| C_M, // 0x094C // AU |
| C_H, // 0x094D // VIRAMA (HALANT) |
| C_M, // 0x094E // PRISHTHAMATRA E |
| C_M, // 0x094F // AW |
| C_S, // 0x0950 // OM |
| C_T, // 0x0951 // UDATTA |
| C_T, // 0x0952 // ANUDATTA |
| C_A, // 0x0953 // GRAVE |
| C_A, // 0x0954 // ACUTE |
| C_M, // 0x0955 // CANDRA LONG E |
| C_M, // 0x0956 // UE |
| C_M, // 0x0957 // UUE |
| C_C|C_N, // 0x0958 // QA // CSOK: WhitespaceAround |
| C_C|C_N, // 0x0959 // KHHA // CSOK: WhitespaceAround |
| C_C|C_N, // 0x095A // GHHA // CSOK: WhitespaceAround |
| C_C|C_N, // 0x095B // ZA // CSOK: WhitespaceAround |
| C_C|C_N, // 0x095C // DDDHA // CSOK: WhitespaceAround |
| C_C|C_N, // 0x095D // RHA // CSOK: WhitespaceAround |
| C_C|C_N, // 0x095E // FA // CSOK: WhitespaceAround |
| C_C|C_N, // 0x095F // YYA // CSOK: WhitespaceAround |
| C_V, // 0x0960 // VOCALIC RR |
| C_V, // 0x0961 // VOCALIC LL |
| C_M, // 0x0962 // VOCALIC RR |
| C_M, // 0x0963 // VOCALIC LL |
| C_P, // 0x0964 // DANDA |
| C_P, // 0x0965 // DOUBLE DANDA |
| C_D, // 0x0966 // ZERO |
| C_D, // 0x0967 // ONE |
| C_D, // 0x0968 // TWO |
| C_D, // 0x0969 // THREE |
| C_D, // 0x096A // FOUR |
| C_D, // 0x096B // FIVE |
| C_D, // 0x096C // SIX |
| C_D, // 0x096D // SEVEN |
| C_D, // 0x096E // EIGHT |
| C_D, // 0x096F // NINE |
| C_S, // 0x0970 // ABBREVIATION SIGN |
| C_S, // 0x0971 // HIGH SPACING DOT |
| C_V, // 0x0972 // CANDRA A (MARATHI) |
| C_V, // 0x0973 // OE (KASHMIRI) |
| C_V, // 0x0974 // OOE (KASHMIRI) |
| C_V, // 0x0975 // AW (KASHMIRI) |
| C_V, // 0x0976 // UE (KASHMIRI) |
| C_V, // 0x0977 // UUE (KASHMIRI) |
| C_U, // 0x0978 // UNASSIGNED |
| C_C, // 0x0979 // ZHA |
| C_C, // 0x097A // HEAVY YA |
| C_C, // 0x097B // GGAA (SINDHI) |
| C_C, // 0x097C // JJA (SINDHI) |
| C_C, // 0x097D // GLOTTAL STOP (LIMBU) |
| C_C, // 0x097E // DDDA (SINDHI) |
| C_C // 0x097F // BBA (SINDHI) |
| }; |
| static int typeOf(int c) { |
| if ( ( c >= ccaStart ) && ( c < ccaEnd ) ) { |
| return cca [ c - ccaStart ] & C_M_TYPE; |
| } else { |
| return C_U; |
| } |
| } |
| static boolean isType(int c, int t) { |
| return typeOf ( c ) == t; |
| } |
| static boolean hasFlag(int c, int f) { |
| if ( ( c >= ccaStart ) && ( c < ccaEnd ) ) { |
| return ( cca [ c - ccaStart ] & f ) == f; |
| } else { |
| return false; |
| } |
| } |
| static boolean isC(int c) { |
| return isType(c,C_C); |
| } |
| static boolean isR(int c) { |
| return isType(c,C_C) && hasR(c); |
| } |
| static boolean isV(int c) { |
| return isType(c,C_V); |
| } |
| static boolean isN(int c) { |
| return c == 0x093C; |
| } |
| static boolean isH(int c) { |
| return c == 0x094D; |
| } |
| static boolean isM(int c) { |
| return isType(c,C_M); |
| } |
| static boolean isPreM(int c) { |
| return isType(c,C_M) && hasFlag(c,C_PRE); |
| } |
| static boolean isX(int c) { |
| switch ( typeOf ( c ) ) { |
| case C_M: // matra (combining vowel) |
| case C_A: // accent mark |
| case C_T: // tone mark |
| case C_O: // other (modifying) mark |
| return true; |
| default: |
| return false; |
| } |
| } |
| static boolean hasR(int c) { |
| return hasFlag(c,C_R); |
| } |
| static boolean hasN(int c) { |
| return hasFlag(c,C_N); |
| } |
| |
| } |