| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* $Id$ */ |
| |
| package org.apache.fop.complexscripts.scripts; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| |
| import org.apache.fop.complexscripts.fonts.GlyphDefinitionTable; |
| import org.apache.fop.complexscripts.util.GlyphSequence; |
| |
| // CSOFF: LineLengthCheck |
| |
| /** |
| * <p>The <code>GurmukhiScriptProcessor</code> class implements a script processor for |
| * performing glyph substitution and positioning operations on content associated with the Gurmukhi script.</p> |
| * |
| * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p> |
| */ |
| public class GurmukhiScriptProcessor extends IndicScriptProcessor { |
| |
| /** logging instance */ |
| private static final Log log = LogFactory.getLog(GurmukhiScriptProcessor.class); |
| |
| GurmukhiScriptProcessor(String script) { |
| super(script); |
| } |
| |
| @Override |
| protected Class<? extends GurmukhiSyllabizer> getSyllabizerClass() { |
| return GurmukhiSyllabizer.class; |
| } |
| |
| @Override |
| // find rightmost pre-base matra |
| protected int findPreBaseMatra(GlyphSequence gs) { |
| int ng = gs.getGlyphCount(); |
| int lk = -1; |
| for (int i = ng; i > 0; i--) { |
| int k = i - 1; |
| if (containsPreBaseMatra(gs, k)) { |
| lk = k; |
| break; |
| } |
| } |
| return lk; |
| } |
| |
| @Override |
| // find leftmost pre-base matra target, starting from source |
| protected int findPreBaseMatraTarget(GlyphSequence gs, int source) { |
| int ng = gs.getGlyphCount(); |
| int lk = -1; |
| for (int i = (source < ng) ? source : ng; i > 0; i--) { |
| int k = i - 1; |
| if (containsConsonant(gs, k)) { |
| if (containsHalfConsonant(gs, k)) { |
| lk = k; |
| } else if (lk == -1) { |
| lk = k; |
| } else { |
| break; |
| } |
| } |
| } |
| return lk; |
| } |
| |
| private static boolean containsPreBaseMatra(GlyphSequence gs, int k) { |
| GlyphSequence.CharAssociation a = gs.getAssociation(k); |
| int[] ca = gs.getCharacterArray(false); |
| for (int i = a.getStart(), e = a.getEnd(); i < e; i++) { |
| if (isPreM(ca [ i ])) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| private static boolean containsConsonant(GlyphSequence gs, int k) { |
| GlyphSequence.CharAssociation a = gs.getAssociation(k); |
| int[] ca = gs.getCharacterArray(false); |
| for (int i = a.getStart(), e = a.getEnd(); i < e; i++) { |
| if (isC(ca [ i ])) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| private static boolean containsHalfConsonant(GlyphSequence gs, int k) { |
| Boolean half = (Boolean) gs.getAssociation(k) .getPredication("half"); |
| return (half != null) ? half.booleanValue() : false; |
| } |
| |
| @Override |
| protected int findReph(GlyphSequence gs) { |
| int ng = gs.getGlyphCount(); |
| int li = -1; |
| for (int i = 0; i < ng; i++) { |
| if (containsReph(gs, i)) { |
| li = i; |
| break; |
| } |
| } |
| return li; |
| } |
| |
| @Override |
| protected int findRephTarget(GlyphSequence gs, int source) { |
| int ng = gs.getGlyphCount(); |
| int c1 = -1; |
| int c2 = -1; |
| // first candidate target is after first non-half consonant |
| for (int i = 0; i < ng; i++) { |
| if ((i != source) && containsConsonant(gs, i)) { |
| if (!containsHalfConsonant(gs, i)) { |
| c1 = i + 1; |
| break; |
| } |
| } |
| } |
| // second candidate target is after last non-prebase matra after first candidate or before first syllable or vedic mark |
| for (int i = (c1 >= 0) ? c1 : 0; i < ng; i++) { |
| if (containsMatra(gs, i) && !containsPreBaseMatra(gs, i)) { |
| c2 = i + 1; |
| } else if (containsOtherMark(gs, i)) { |
| c2 = i; |
| break; |
| } |
| } |
| if (c2 >= 0) { |
| return c2; |
| } else if (c1 >= 0) { |
| return c1; |
| } else { |
| return source; |
| } |
| } |
| |
| private static boolean containsReph(GlyphSequence gs, int k) { |
| Boolean rphf = (Boolean) gs.getAssociation(k) .getPredication("rphf"); |
| return (rphf != null) ? rphf.booleanValue() : false; |
| } |
| |
| private static boolean containsMatra(GlyphSequence gs, int k) { |
| GlyphSequence.CharAssociation a = gs.getAssociation(k); |
| int[] ca = gs.getCharacterArray(false); |
| for (int i = a.getStart(), e = a.getEnd(); i < e; i++) { |
| if (isM(ca [ i ])) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| private static boolean containsOtherMark(GlyphSequence gs, int k) { |
| GlyphSequence.CharAssociation a = gs.getAssociation(k); |
| int[] ca = gs.getCharacterArray(false); |
| for (int i = a.getStart(), e = a.getEnd(); i < e; i++) { |
| switch (typeOf(ca [ i ])) { |
| case C_T: // tone (e.g., udatta, anudatta) |
| case C_A: // accent (e.g., acute, grave) |
| case C_O: // other (e.g., candrabindu, anusvara, visarga, etc) |
| return true; |
| default: |
| break; |
| } |
| } |
| return false; |
| } |
| |
| private static class GurmukhiSyllabizer extends DefaultSyllabizer { |
| GurmukhiSyllabizer(String script, String language) { |
| super(script, language); |
| } |
| @Override |
| // | C ... |
| protected int findStartOfSyllable(int[] ca, int s, int e) { |
| if ((s < 0) || (s >= e)) { |
| return -1; |
| } else { |
| while (s < e) { |
| int c = ca [ s ]; |
| if (isC(c)) { |
| break; |
| } else { |
| s++; |
| } |
| } |
| return s; |
| } |
| } |
| @Override |
| // D* L? | ... |
| protected int findEndOfSyllable(int[] ca, int s, int e) { |
| if ((s < 0) || (s >= e)) { |
| return -1; |
| } else { |
| int nd = 0; |
| int nl = 0; |
| int i; |
| // consume dead consonants |
| while ((i = isDeadConsonant(ca, s, e)) > s) { |
| s = i; |
| nd++; |
| } |
| // consume zero or one live consonant |
| if ((i = isLiveConsonant(ca, s, e)) > s) { |
| s = i; |
| nl++; |
| } |
| return ((nd > 0) || (nl > 0)) ? s : -1; |
| } |
| } |
| // D := ( C N? H )? |
| private int isDeadConsonant(int[] ca, int s, int e) { |
| if (s < 0) { |
| return -1; |
| } else { |
| int c; |
| int i = 0; |
| int nc = 0; |
| int nh = 0; |
| do { |
| // C |
| if ((s + i) < e) { |
| c = ca [ s + i ]; |
| if (isC(c)) { |
| i++; |
| nc++; |
| } else { |
| break; |
| } |
| } |
| // N? |
| if ((s + i) < e) { |
| c = ca [ s + 1 ]; |
| if (isN(c)) { |
| i++; |
| } |
| } |
| // H |
| if ((s + i) < e) { |
| c = ca [ s + i ]; |
| if (isH(c)) { |
| i++; |
| nh++; |
| } else { |
| break; |
| } |
| } |
| } while (false); |
| return (nc > 0) && (nh > 0) ? s + i : -1; |
| } |
| } |
| // L := ( (C|V) N? X* )?; where X = ( MATRA | ACCENT MARK | TONE MARK | OTHER MARK ) |
| private int isLiveConsonant(int[] ca, int s, int e) { |
| if (s < 0) { |
| return -1; |
| } else { |
| int c; |
| int i = 0; |
| int nc = 0; |
| int nv = 0; |
| int nx = 0; |
| do { |
| // C |
| if ((s + i) < e) { |
| c = ca [ s + i ]; |
| if (isC(c)) { |
| i++; |
| nc++; |
| } else if (isV(c)) { |
| i++; |
| nv++; |
| } else { |
| break; |
| } |
| } |
| // N? |
| if ((s + i) < e) { |
| c = ca [ s + i ]; |
| if (isN(c)) { |
| i++; |
| } |
| } |
| // X* |
| while ((s + i) < e) { |
| c = ca [ s + i ]; |
| if (isX(c)) { |
| i++; |
| nx++; |
| } else { |
| break; |
| } |
| } |
| } while (false); |
| // if no X but has H, then ignore C|I |
| if (nx == 0) { |
| if ((s + i) < e) { |
| c = ca [ s + i ]; |
| if (isH(c)) { |
| if (nc > 0) { |
| nc--; |
| } else if (nv > 0) { |
| nv--; |
| } |
| } |
| } |
| } |
| return ((nc > 0) || (nv > 0)) ? s + i : -1; |
| } |
| } |
| } |
| |
| // gurmukhi character types |
| static final short C_U = 0; // unassigned |
| static final short C_C = 1; // consonant |
| static final short C_V = 2; // vowel |
| static final short C_M = 3; // vowel sign (matra) |
| static final short C_S = 4; // symbol or sign |
| static final short C_T = 5; // tone mark |
| static final short C_A = 6; // accent mark |
| static final short C_P = 7; // punctuation |
| static final short C_D = 8; // digit |
| static final short C_H = 9; // halant (virama) |
| static final short C_O = 10; // other signs |
| static final short C_N = 0x0100; // nukta(ized) |
| static final short C_R = 0x0200; // reph(ized) |
| static final short C_PRE = 0x0400; // pre-base |
| static final short C_M_TYPE = 0x00FF; // type mask |
| static final short C_M_FLAGS = 0x7F00; // flag mask |
| // gurmukhi block range |
| static final int CCA_START = 0x0A00; // first code point mapped by cca |
| static final int CCA_END = 0x0A80; // last code point + 1 mapped by cca |
| // gurmukhi character type lookups |
| static final short[] CCA = { |
| C_U, // 0x0A00 // UNASSIGNED |
| C_O, // 0x0A01 // ADAK BINDI |
| C_O, // 0x0A02 // BINDI |
| C_O, // 0x0A03 // VISARGA |
| C_U, // 0x0A04 // UNASSIGNED |
| C_V, // 0x0A05 // A |
| C_V, // 0x0A06 // AA |
| C_V, // 0x0A07 // I |
| C_V, // 0x0A08 // II |
| C_V, // 0x0A09 // U |
| C_V, // 0x0A0A // UU |
| C_U, // 0x0A0B // UNASSIGNED |
| C_U, // 0x0A0C // UNASSIGNED |
| C_U, // 0x0A0D // UNASSIGNED |
| C_U, // 0x0A0E // UNASSIGNED |
| C_V, // 0x0A0F // E |
| C_V, // 0x0A10 // AI |
| C_U, // 0x0A11 // UNASSIGNED |
| C_U, // 0x0A12 // UNASSIGNED |
| C_V, // 0x0A13 // O |
| C_V, // 0x0A14 // AU |
| C_C, // 0x0A15 // KA |
| C_C, // 0x0A16 // KHA |
| C_C, // 0x0A17 // GA |
| C_C, // 0x0A18 // GHA |
| C_C, // 0x0A19 // NGA |
| C_C, // 0x0A1A // CA |
| C_C, // 0x0A1B // CHA |
| C_C, // 0x0A1C // JA |
| C_C, // 0x0A1D // JHA |
| C_C, // 0x0A1E // NYA |
| C_C, // 0x0A1F // TTA |
| C_C, // 0x0A20 // TTHA |
| C_C, // 0x0A21 // DDA |
| C_C, // 0x0A22 // DDHA |
| C_C, // 0x0A23 // NNA |
| C_C, // 0x0A24 // TA |
| C_C, // 0x0A25 // THA |
| C_C, // 0x0A26 // DA |
| C_C, // 0x0A27 // DHA |
| C_C, // 0x0A28 // NA |
| C_U, // 0x0A29 // UNASSIGNED |
| C_C, // 0x0A2A // PA |
| C_C, // 0x0A2B // PHA |
| C_C, // 0x0A2C // BA |
| C_C, // 0x0A2D // BHA |
| C_C, // 0x0A2E // MA |
| C_C, // 0x0A2F // YA |
| C_C | C_R, // 0x0A30 // RA |
| C_U, // 0x0A31 // UNASSIGNED |
| C_C, // 0x0A32 // LA |
| C_C, // 0x0A33 // LLA |
| C_U, // 0x0A34 // UNASSIGNED |
| C_C, // 0x0A35 // VA |
| C_C, // 0x0A36 // SHA |
| C_U, // 0x0A37 // UNASSIGNED |
| C_C, // 0x0A38 // SA |
| C_C, // 0x0A39 // HA |
| C_U, // 0x0A3A // UNASSIGNED |
| C_U, // 0x0A3B // UNASSIGNED |
| C_N, // 0x0A3C // NUKTA |
| C_U, // 0x0A3D // UNASSIGNED |
| C_M, // 0x0A3E // AA |
| C_M | C_PRE, // 0x0A3F // I |
| C_M, // 0x0A40 // II |
| C_M, // 0x0A41 // U |
| C_M, // 0x0A42 // UU |
| C_U, // 0x0A43 // UNASSIGNED |
| C_U, // 0x0A44 // UNASSIGNED |
| C_U, // 0x0A45 // UNASSIGNED |
| C_U, // 0x0A46 // UNASSIGNED |
| C_M, // 0x0A47 // EE |
| C_M, // 0x0A48 // AI |
| C_U, // 0x0A49 // UNASSIGNED |
| C_U, // 0x0A4A // UNASSIGNED |
| C_M, // 0x0A4B // OO |
| C_M, // 0x0A4C // AU |
| C_H, // 0x0A4D // VIRAMA (HALANT) |
| C_U, // 0x0A4E // UNASSIGNED |
| C_U, // 0x0A4F // UNASSIGNED |
| C_U, // 0x0A50 // UNASSIGNED |
| C_T, // 0x0A51 // UDATTA |
| C_U, // 0x0A52 // UNASSIGNED |
| C_U, // 0x0A53 // UNASSIGNED |
| C_U, // 0x0A54 // UNASSIGNED |
| C_U, // 0x0A55 // UNASSIGNED |
| C_U, // 0x0A56 // UNASSIGNED |
| C_U, // 0x0A57 // UNASSIGNED |
| C_U, // 0x0A58 // UNASSIGNED |
| C_C | C_N, // 0x0A59 // KHHA |
| C_C | C_N, // 0x0A5A // GHHA |
| C_C | C_N, // 0x0A5B // ZA |
| C_C | C_N, // 0x0A5C // RRA |
| C_U, // 0x0A5D // UNASSIGNED |
| C_C | C_N, // 0x0A5E // FA |
| C_U, // 0x0A5F // UNASSIGNED |
| C_U, // 0x0A60 // UNASSIGNED |
| C_U, // 0x0A61 // UNASSIGNED |
| C_U, // 0x0A62 // UNASSIGNED |
| C_U, // 0x0A63 // UNASSIGNED |
| C_U, // 0x0A64 // UNASSIGNED |
| C_U, // 0x0A65 // UNASSIGNED |
| C_D, // 0x0A66 // ZERO |
| C_D, // 0x0A67 // ONE |
| C_D, // 0x0A68 // TWO |
| C_D, // 0x0A69 // THREE |
| C_D, // 0x0A6A // FOUR |
| C_D, // 0x0A6B // FIVE |
| C_D, // 0x0A6C // SIX |
| C_D, // 0x0A6D // SEVEN |
| C_D, // 0x0A6E // EIGHT |
| C_D, // 0x0A6F // NINE |
| C_O, // 0x0A70 // TIPPI |
| C_O, // 0x0A71 // ADDAK |
| C_V, // 0x0A72 // IRI |
| C_V, // 0x0A73 // URA |
| C_S, // 0x0A74 // EK ONKAR |
| C_O, // 0x0A75 // YAKASH |
| C_U, // 0x0A76 // UNASSIGNED |
| C_U, // 0x0A77 // UNASSIGNED |
| C_U, // 0x0A78 // UNASSIGNED |
| C_U, // 0x0A79 // UNASSIGNED |
| C_U, // 0x0A7A // UNASSIGNED |
| C_U, // 0x0A7B // UNASSIGNED |
| C_U, // 0x0A7C // UNASSIGNED |
| C_U, // 0x0A7D // UNASSIGNED |
| C_U, // 0x0A7E // UNASSIGNED |
| C_U // 0x0A7F // UNASSIGNED |
| }; |
| static int typeOf(int c) { |
| if ((c >= CCA_START) && (c < CCA_END)) { |
| return CCA [ c - CCA_START ] & C_M_TYPE; |
| } else { |
| return C_U; |
| } |
| } |
| static boolean isType(int c, int t) { |
| return typeOf(c) == t; |
| } |
| static boolean hasFlag(int c, int f) { |
| if ((c >= CCA_START) && (c < CCA_END)) { |
| return (CCA [ c - CCA_START ] & f) == f; |
| } else { |
| return false; |
| } |
| } |
| static boolean isC(int c) { |
| return isType(c, C_C); |
| } |
| static boolean isR(int c) { |
| return isType(c, C_C) && hasR(c); |
| } |
| static boolean isV(int c) { |
| return isType(c, C_V); |
| } |
| static boolean isN(int c) { |
| return c == 0x0A3C; |
| } |
| static boolean isH(int c) { |
| return c == 0x0A4D; |
| } |
| static boolean isM(int c) { |
| return isType(c, C_M); |
| } |
| static boolean isPreM(int c) { |
| return isType(c, C_M) && hasFlag(c, C_PRE); |
| } |
| static boolean isX(int c) { |
| switch (typeOf(c)) { |
| case C_M: // matra (combining vowel) |
| case C_A: // accent mark |
| case C_T: // tone mark |
| case C_O: // other (modifying) mark |
| return true; |
| default: |
| return false; |
| } |
| } |
| static boolean hasR(int c) { |
| return hasFlag(c, C_R); |
| } |
| static boolean hasN(int c) { |
| return hasFlag(c, C_N); |
| } |
| |
| @Override |
| public GlyphSequence reorderCombiningMarks(GlyphDefinitionTable gdef, GlyphSequence gs, int[][] gpa, String script, String language) { |
| return super.reorderCombiningMarks(gdef, gs, gpa, script, language); |
| } |
| |
| } |