fop-core/src/main/java/org/apache/fop/complexscripts/scripts/TamilScriptProcessor.java - xmlgraphics-fop - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /* $Id$ */

 package org.apache.fop.complexscripts.scripts;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;

 import org.apache.fop.complexscripts.util.CharAssociation;
 import org.apache.fop.complexscripts.util.GlyphSequence;

 // CSOFF: LineLengthCheck

 /**
  * <p>The <code>TamilScriptProcessor</code> class implements a script processor for
  * performing glyph substitution and positioning operations on content associated with the Tamil script.</p>
  *
  * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
  */
 public class TamilScriptProcessor extends IndicScriptProcessor {

     /** logging instance */
     private static final Log log = LogFactory.getLog(TamilScriptProcessor.class);

     TamilScriptProcessor(String script) {
         super(script);
     }

     @Override
     protected Class<? extends TamilSyllabizer> getSyllabizerClass() {
         return TamilSyllabizer.class;
     }

     @Override
     // find rightmost pre-base matra
     protected int findPreBaseMatra(GlyphSequence gs) {
         int   ng = gs.getGlyphCount();
         int   lk = -1;
         for (int i = ng; i > 0; i--) {
             int k = i - 1;
             if (containsPreBaseMatra(gs, k)) {
                 lk = k;
                 break;
             }
         }
         return lk;
     }

     @Override
     // find leftmost pre-base matra target, starting from source
     protected int findPreBaseMatraTarget(GlyphSequence gs, int source) {
         int   ng = gs.getGlyphCount();
         int   lk = -1;
         for (int i = (source < ng) ? source : ng; i > 0; i--) {
             int k = i - 1;
             if (containsConsonant(gs, k)) {
                 if (containsHalfConsonant(gs, k)) {
                     lk = k;
                 } else if (lk == -1) {
                     lk = k;
                 } else {
                     break;
                 }
             }
         }
         return lk;
     }

     private static boolean containsPreBaseMatra(GlyphSequence gs, int k) {
         CharAssociation a = gs.getAssociation(k);
         int[] ca = gs.getCharacterArray(false);
         for (int i = a.getStart(), e = a.getEnd(); i < e; i++) {
             if (isPreM(ca [ i ])) {
                 return true;
             }
         }
         return false;
     }

     private static boolean containsConsonant(GlyphSequence gs, int k) {
         CharAssociation a = gs.getAssociation(k);
         int[] ca = gs.getCharacterArray(false);
         for (int i = a.getStart(), e = a.getEnd(); i < e; i++) {
             if (isC(ca [ i ])) {
                 return true;
             }
         }
         return false;
     }

     private static boolean containsHalfConsonant(GlyphSequence gs, int k) {
         Boolean half = (Boolean) gs.getAssociation(k).getPredication("half");
         return (half != null) ? half : false;
     }

     @Override
     protected int findReph(GlyphSequence gs) {
         int   ng = gs.getGlyphCount();
         int   li = -1;
         for (int i = 0; i < ng; i++) {
             if (containsReph(gs, i)) {
                 li = i;
                 break;
             }
         }
         return li;
     }

     @Override
     protected int findRephTarget(GlyphSequence gs, int source) {
         int   ng = gs.getGlyphCount();
         int   c1 = -1;
         int   c2 = -1;
         // first candidate target is after first non-half consonant
         for (int i = 0; i < ng; i++) {
             if ((i != source) && containsConsonant(gs, i)) {
                 if (!containsHalfConsonant(gs, i)) {
                     c1 = i + 1;
                     break;
                 }
             }
         }
         // second candidate target is after last non-prebase matra after first candidate or before first syllable or vedic mark
         for (int i = (c1 >= 0) ? c1 : 0; i < ng; i++) {
             if (containsMatra(gs, i) && !containsPreBaseMatra(gs, i)) {
                 c2 = i + 1;
             } else if (containsOtherMark(gs, i)) {
                 c2 = i;
                 break;
             }
         }
         if (c2 >= 0) {
             return c2;
         } else if (c1 >= 0) {
             return c1;
         } else {
             return source;
         }
     }

     private static boolean containsReph(GlyphSequence gs, int k) {
         Boolean rphf = (Boolean) gs.getAssociation(k).getPredication("rphf");
         return (rphf != null) ? rphf : false;
     }

     private static boolean containsMatra(GlyphSequence gs, int k) {
         CharAssociation a = gs.getAssociation(k);
         int[] ca = gs.getCharacterArray(false);
         for (int i = a.getStart(), e = a.getEnd(); i < e; i++) {
             if (isM(ca [ i ])) {
                 return true;
             }
         }
         return false;
     }

     private static boolean containsOtherMark(GlyphSequence gs, int k) {
         CharAssociation a = gs.getAssociation(k);
         int[] ca = gs.getCharacterArray(false);
         for (int i = a.getStart(), e = a.getEnd(); i < e; i++) {
             switch (typeOf(ca [ i ])) {
             case C_T:   // tone (e.g., udatta, anudatta)
             case C_A:   // accent (e.g., acute, grave)
             case C_O:   // other (e.g., candrabindu, anusvara, visarga, etc)
                 return true;
             default:
                 break;
             }
         }
         return false;
     }

     private static class TamilSyllabizer extends DefaultSyllabizer {
         TamilSyllabizer(String script, String language) {
             super(script, language);
         }
         @Override
         // | C ...
         protected int findStartOfSyllable(int[] ca, int s, int e) {
             if ((s < 0) || (s >= e)) {
                 return -1;
             } else {
                 while (s < e) {
                     int c = ca [ s ];
                     if (isC(c)) {
                         break;
                     } else {
                         s++;
                     }
                 }
                 return s;
             }
         }
         @Override
         // D* L? | ...
         protected int findEndOfSyllable(int[] ca, int s, int e) {
             if ((s < 0) || (s >= e)) {
                 return -1;
             } else {
                 int nd = 0;
                 int nl = 0;
                 int i;
                 // consume dead consonants
                 while ((i = isDeadConsonant(ca, s, e)) > s) {
                     s = i;
                     nd++;
                 }
                 // consume zero or one live consonant
                 if ((i = isLiveConsonant(ca, s, e)) > s) {
                     s = i;
                     nl++;
                 }
                 return ((nd > 0) || (nl > 0)) ? s : -1;
             }
         }
         // D := ( C N? H )?
         private int isDeadConsonant(int[] ca, int s, int e) {
             if (s < 0) {
                 return -1;
             } else {
                 int c;
                 int i = 0;
                 int nc = 0;
                 int nh = 0;
                 do {
                     // C
                     if ((s + i) < e) {
                         c = ca [ s + i ];
                         if (isC(c)) {
                             i++;
                             nc++;
                         } else {
                             break;
                         }
                     }
                     // N?
                     if ((s + i) < e) {
                         c = ca [ s + 1 ];
                         if (isN(c)) {
                             i++;
                         }
                     }
                     // H
                     if ((s + i) < e) {
                         c = ca [ s + i ];
                         if (isH(c)) {
                             i++;
                             nh++;
                         } else {
                             break;
                         }
                     }
                 } while (false);
                 return (nc > 0) && (nh > 0) ? s + i : -1;
             }
         }
         // L := ( (C|V) N? X* )?; where X = ( MATRA | ACCENT MARK | TONE MARK | OTHER MARK )
         private int isLiveConsonant(int[] ca, int s, int e) {
             if (s < 0) {
                 return -1;
             } else {
                 int c;
                 int i = 0;
                 int nc = 0;
                 int nv = 0;
                 int nx = 0;
                 do {
                     // C
                     if ((s + i) < e) {
                         c = ca [ s + i ];
                         if (isC(c)) {
                             i++;
                             nc++;
                         } else if (isV(c)) {
                             i++;
                             nv++;
                         } else {
                             break;
                         }
                     }
                     // N?
                     if ((s + i) < e) {
                         c = ca [ s + i ];
                         if (isN(c)) {
                             i++;
                         }
                     }
                     // X*
                     while ((s + i) < e) {
                         c = ca [ s + i ];
                         if (isX(c)) {
                             i++;
                             nx++;
                         } else {
                             break;
                         }
                     }
                 } while (false);
                 // if no X but has H, then ignore C|I
                 if (nx == 0) {
                     if ((s + i) < e) {
                         c = ca [ s + i ];
                         if (isH(c)) {
                             if (nc > 0) {
                                 nc--;
                             } else if (nv > 0) {
                                 nv--;
                             }
                         }
                     }
                 }
                 return ((nc > 0) || (nv > 0)) ? s + i : -1;
             }
         }
     }

     // tamil character types
     static final short C_U          = 0;                        // unassigned
     static final short C_C          = 1;                        // consonant
     static final short C_V          = 2;                        // vowel
     static final short C_M          = 3;                        // vowel sign (matra)
     static final short C_S          = 4;                        // symbol or sign
     static final short C_T          = 5;                        // tone mark
     static final short C_A          = 6;                        // accent mark
     static final short C_P          = 7;                        // punctuation
     static final short C_D          = 8;                        // digit
     static final short C_H          = 9;                        // halant (virama)
     static final short C_O          = 10;                       // other signs
     static final short C_N          = 0x0100;                   // nukta(ized)
     static final short C_R          = 0x0200;                   // reph(ized)
     static final short C_PRE        = 0x0400;                   // pre-base
     static final short C_POST       = 0x1000;                   // post-base
     static final short C_WRAP       = C_PRE | C_POST;           // wrap (two part) vowel
     static final short C_M_TYPE     = 0x00FF;                   // type mask
     static final short C_M_FLAGS    = 0x7F00;                   // flag mask
     // tamil block range
     static final int CCA_START       =  0x0B80;                 // first code point mapped by cca
     static final int CCA_END         =  0x0C00;                 // last code point + 1 mapped by cca
     // tamil character type lookups
     static final short[] CCA = {
         C_U,                        // 0x0B80                   //
         C_U,                        // 0x0B81                   //
         C_O,                        // 0x0B82                   // ANUSVARA
         C_O,                        // 0x0B83                   // VISARGA
         C_U,                        // 0x0B84                   //
         C_V,                        // 0x0B85                   // A
         C_V,                        // 0x0B86                   // AA
         C_V,                        // 0x0B87                   // I
         C_V,                        // 0x0B88                   // II
         C_V,                        // 0x0B89                   // U
         C_V,                        // 0x0B8A                   // UU
         C_U,                        // 0x0B8B                   //
         C_U,                        // 0x0B8C                   //
         C_U,                        // 0x0B8D                   //
         C_V,                        // 0x0B8E                   // E
         C_V,                        // 0x0B8F                   // EE
         C_V,                        // 0x0B90                   // AI
         C_U,                        // 0x0B91                   //
         C_V,                        // 0x0B92                   // O
         C_V,                        // 0x0B93                   // OO
         C_V,                        // 0x0B94                   // AU
         C_C,                        // 0x0B95                   // KA
         C_U,                        // 0x0B96                   //
         C_U,                        // 0x0B97                   //
         C_U,                        // 0x0B98                   //
         C_C,                        // 0x0B99                   // NGA
         C_C,                        // 0x0B9A                   // CA
         C_U,                        // 0x0B9B                   //
         C_C,                        // 0x0B9C                   // JA
         C_U,                        // 0x0B9D                   //
         C_C,                        // 0x0B9E                   // NYA
         C_C,                        // 0x0B9F                   // TTA
         C_U,                        // 0x0BA0                   //
         C_U,                        // 0x0BA1                   //
         C_U,                        // 0x0BA2                   //
         C_C,                        // 0x0BA3                   // NNA
         C_C,                        // 0x0BA4                   // TA
         C_U,                        // 0x0BA5                   //
         C_U,                        // 0x0BA6                   //
         C_U,                        // 0x0BA7                   //
         C_C,                        // 0x0BA8                   // NA
         C_C,                        // 0x0BA9                   // NNNA
         C_C,                        // 0x0BAA                   // PA
         C_U,                        // 0x0BAB                   //
         C_U,                        // 0x0BAC                   //
         C_U,                        // 0x0BAD                   //
         C_C,                        // 0x0BAE                   // MA
         C_C,                        // 0x0BAF                   // YA
         C_C | C_R,                  // 0x0BB0                   // RA
         C_C | C_R,                  // 0x0BB1                   // RRA
         C_C,                        // 0x0BB2                   // LA
         C_C,                        // 0x0BB3                   // LLA
         C_C,                        // 0x0BB4                   // LLLA
         C_C,                        // 0x0BB5                   // VA
         C_C,                        // 0x0BB6                   // SHA
         C_C,                        // 0x0BB7                   // SSA
         C_C,                        // 0x0BB8                   // SA
         C_C,                        // 0x0BB9                   // HA
         C_U,                        // 0x0BBA                   //
         C_U,                        // 0x0BBB                   //
         C_U,                        // 0x0BBC                   //
         C_U,                        // 0x0BBD                   //
         C_M,                        // 0x0BBE                   // AA
         C_M,                        // 0x0BBF                   // I
         C_M,                        // 0x0BC0                   // II
         C_M,                        // 0x0BC1                   // U
         C_M,                        // 0x0BC2                   // UU
         C_U,                        // 0x0BC3                   //
         C_U,                        // 0x0BC4                   //
         C_U,                        // 0x0BC5                   //
         C_M | C_PRE,                // 0x0BC6                   // E
         C_M | C_PRE,                // 0x0BC7                   // EE
         C_M | C_PRE,                // 0x0BC8                   // AI
         C_U,                        // 0x0BC9                   //
         C_M | C_WRAP,               // 0x0BCA                   // O
         C_M | C_WRAP,               // 0x0BCB                   // OO
         C_M | C_WRAP,               // 0x0BCC                   // AU
         C_H,                        // 0x0BCD                   // VIRAMA (HALANT)
         C_U,                        // 0x0BCE                   //
         C_U,                        // 0x0BCF                   //
         C_S,                        // 0x0BD0                   // OM
         C_U,                        // 0x0BD1                   //
         C_U,                        // 0x0BD2                   //
         C_U,                        // 0x0BD3                   //
         C_U,                        // 0x0BD4                   //
         C_U,                        // 0x0BD5                   //
         C_U,                        // 0x0BD6                   //
         C_M,                        // 0x0BD7                   // AU LENGTH MARK
         C_U,                        // 0x0BD8                   //
         C_U,                        // 0x0BD9                   //
         C_U,                        // 0x0BDA                   //
         C_U,                        // 0x0BDB                   //
         C_U,                        // 0x0BDC                   //
         C_U,                        // 0x0BDD                   //
         C_U,                        // 0x0BDE                   //
         C_U,                        // 0x0BDF                   //
         C_U,                        // 0x0BE0                   //
         C_U,                        // 0x0BE1                   //
         C_U,                        // 0x0BE2                   //
         C_U,                        // 0x0BE3                   //
         C_U,                        // 0x0BE4                   //
         C_U,                        // 0x0BE5                   //
         C_D,                        // 0x0BE6                   // ZERO
         C_D,                        // 0x0BE7                   // ONE
         C_D,                        // 0x0BE8                   // TWO
         C_D,                        // 0x0BE9                   // THREE
         C_D,                        // 0x0BEA                   // FOUR
         C_D,                        // 0x0BEB                   // FIVE
         C_D,                        // 0x0BEC                   // SIX
         C_D,                        // 0x0BED                   // SEVEN
         C_D,                        // 0x0BEE                   // EIGHT
         C_D,                        // 0x0BEF                   // NINE
         C_S,                        // 0x0BF0                   // TEN
         C_S,                        // 0x0BF1                   // ONE HUNDRED
         C_S,                        // 0x0BF2                   // ONE THOUSAND
         C_S,                        // 0x0BF3                   // DAY SIGN (naal)
         C_S,                        // 0x0BF4                   // MONTH SIGN (maatham)
         C_S,                        // 0x0BF5                   // YEAR SIGN (varudam)
         C_S,                        // 0x0BF6                   // DEBIT SIGN (patru)
         C_S,                        // 0x0BF7                   // CREDIT SIGN (varavu)
         C_S,                        // 0x0BF8                   // AS ABOVE SIGN (merpadi)
         C_S,                        // 0x0BF9                   // RUPEE SIGN (rupai)
         C_S,                        // 0x0BFA                   // NUMBER SIGN (enn)
         C_U,                        // 0x0BFB                   //
         C_U,                        // 0x0BFC                   //
         C_U,                        // 0x0BFD                   //
         C_U,                        // 0x0BFE                   //
         C_U                         // 0x0BFF                   //
     };
     static int typeOf(int c) {
         if ((c >= CCA_START) && (c < CCA_END)) {
             return CCA [ c - CCA_START ] & C_M_TYPE;
         } else {
             return C_U;
         }
     }
     static boolean isType(int c, int t) {
         return typeOf(c) == t;
     }
     static boolean hasFlag(int c, int f) {
         if ((c >= CCA_START) && (c < CCA_END)) {
             return (CCA [ c - CCA_START ] & f) == f;
         } else {
             return false;
         }
     }
     static boolean isC(int c) {
         return isType(c, C_C);
     }
     static boolean isR(int c) {
         return isType(c, C_C) && hasR(c);
     }
     static boolean isV(int c) {
         return isType(c, C_V);
     }
     static boolean isN(int c) {
         return c == 0x093C;
     }
     static boolean isH(int c) {
         return c == 0x094D;
     }
     static boolean isM(int c) {
         return isType(c, C_M);
     }
     static boolean isPreM(int c) {
         return isType(c, C_M) && hasFlag(c, C_PRE);
     }
     static boolean isX(int c) {
         switch (typeOf(c)) {
         case C_M: // matra (combining vowel)
         case C_A: // accent mark
         case C_T: // tone mark
         case C_O: // other (modifying) mark
             return true;
         default:
             return false;
         }
     }
     static boolean hasR(int c) {
         return hasFlag(c, C_R);
     }
     static boolean hasN(int c) {
         return hasFlag(c, C_N);
     }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	/* $Id$ */

	package org.apache.fop.complexscripts.scripts;

	import org.apache.commons.logging.Log;
	import org.apache.commons.logging.LogFactory;

	import org.apache.fop.complexscripts.util.CharAssociation;
	import org.apache.fop.complexscripts.util.GlyphSequence;

	// CSOFF: LineLengthCheck

	/**
	* <p>The <code>TamilScriptProcessor</code> class implements a script processor for
	* performing glyph substitution and positioning operations on content associated with the Tamil script.</p>
	*
	* <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
	*/
	public class TamilScriptProcessor extends IndicScriptProcessor {

	/** logging instance */
	private static final Log log = LogFactory.getLog(TamilScriptProcessor.class);

	TamilScriptProcessor(String script) {
	super(script);
	}

	@Override
	protected Class<? extends TamilSyllabizer> getSyllabizerClass() {
	return TamilSyllabizer.class;
	}

	@Override
	// find rightmost pre-base matra
	protected int findPreBaseMatra(GlyphSequence gs) {
	int ng = gs.getGlyphCount();
	int lk = -1;
	for (int i = ng; i > 0; i--) {
	int k = i - 1;
	if (containsPreBaseMatra(gs, k)) {
	lk = k;
	break;
	}
	}
	return lk;
	}

	@Override
	// find leftmost pre-base matra target, starting from source
	protected int findPreBaseMatraTarget(GlyphSequence gs, int source) {
	int ng = gs.getGlyphCount();
	int lk = -1;
	for (int i = (source < ng) ? source : ng; i > 0; i--) {
	int k = i - 1;
	if (containsConsonant(gs, k)) {
	if (containsHalfConsonant(gs, k)) {
	lk = k;
	} else if (lk == -1) {
	lk = k;
	} else {
	break;
	}
	}
	}
	return lk;
	}

	private static boolean containsPreBaseMatra(GlyphSequence gs, int k) {
	CharAssociation a = gs.getAssociation(k);
	int[] ca = gs.getCharacterArray(false);
	for (int i = a.getStart(), e = a.getEnd(); i < e; i++) {
	if (isPreM(ca [ i ])) {
	return true;
	}
	}
	return false;
	}

	private static boolean containsConsonant(GlyphSequence gs, int k) {
	CharAssociation a = gs.getAssociation(k);
	int[] ca = gs.getCharacterArray(false);
	for (int i = a.getStart(), e = a.getEnd(); i < e; i++) {
	if (isC(ca [ i ])) {
	return true;
	}
	}
	return false;
	}

	private static boolean containsHalfConsonant(GlyphSequence gs, int k) {
	Boolean half = (Boolean) gs.getAssociation(k).getPredication("half");
	return (half != null) ? half : false;
	}

	@Override
	protected int findReph(GlyphSequence gs) {
	int ng = gs.getGlyphCount();
	int li = -1;
	for (int i = 0; i < ng; i++) {
	if (containsReph(gs, i)) {
	li = i;
	break;
	}
	}
	return li;
	}

	@Override
	protected int findRephTarget(GlyphSequence gs, int source) {
	int ng = gs.getGlyphCount();
	int c1 = -1;
	int c2 = -1;
	// first candidate target is after first non-half consonant
	for (int i = 0; i < ng; i++) {
	if ((i != source) && containsConsonant(gs, i)) {
	if (!containsHalfConsonant(gs, i)) {
	c1 = i + 1;
	break;
	}
	}
	}
	// second candidate target is after last non-prebase matra after first candidate or before first syllable or vedic mark
	for (int i = (c1 >= 0) ? c1 : 0; i < ng; i++) {
	if (containsMatra(gs, i) && !containsPreBaseMatra(gs, i)) {
	c2 = i + 1;
	} else if (containsOtherMark(gs, i)) {
	c2 = i;
	break;
	}
	}
	if (c2 >= 0) {
	return c2;
	} else if (c1 >= 0) {
	return c1;
	} else {
	return source;
	}
	}

	private static boolean containsReph(GlyphSequence gs, int k) {
	Boolean rphf = (Boolean) gs.getAssociation(k).getPredication("rphf");
	return (rphf != null) ? rphf : false;
	}

	private static boolean containsMatra(GlyphSequence gs, int k) {
	CharAssociation a = gs.getAssociation(k);
	int[] ca = gs.getCharacterArray(false);
	for (int i = a.getStart(), e = a.getEnd(); i < e; i++) {
	if (isM(ca [ i ])) {
	return true;
	}
	}
	return false;
	}

	private static boolean containsOtherMark(GlyphSequence gs, int k) {
	CharAssociation a = gs.getAssociation(k);
	int[] ca = gs.getCharacterArray(false);
	for (int i = a.getStart(), e = a.getEnd(); i < e; i++) {
	switch (typeOf(ca [ i ])) {
	case C_T: // tone (e.g., udatta, anudatta)
	case C_A: // accent (e.g., acute, grave)
	case C_O: // other (e.g., candrabindu, anusvara, visarga, etc)
	return true;
	default:
	break;
	}
	}
	return false;
	}

	private static class TamilSyllabizer extends DefaultSyllabizer {
	TamilSyllabizer(String script, String language) {
	super(script, language);
	}
	@Override
	// \| C ...
	protected int findStartOfSyllable(int[] ca, int s, int e) {
	if ((s < 0) \|\| (s >= e)) {
	return -1;
	} else {
	while (s < e) {
	int c = ca [ s ];
	if (isC(c)) {
	break;
	} else {
	s++;
	}
	}
	return s;
	}
	}
	@Override
	// D* L? \| ...
	protected int findEndOfSyllable(int[] ca, int s, int e) {
	if ((s < 0) \|\| (s >= e)) {
	return -1;
	} else {
	int nd = 0;
	int nl = 0;
	int i;
	// consume dead consonants
	while ((i = isDeadConsonant(ca, s, e)) > s) {
	s = i;
	nd++;
	}
	// consume zero or one live consonant
	if ((i = isLiveConsonant(ca, s, e)) > s) {
	s = i;
	nl++;
	}
	return ((nd > 0) \|\| (nl > 0)) ? s : -1;
	}
	}
	// D := ( C N? H )?
	private int isDeadConsonant(int[] ca, int s, int e) {
	if (s < 0) {
	return -1;
	} else {
	int c;
	int i = 0;
	int nc = 0;
	int nh = 0;
	do {
	// C
	if ((s + i) < e) {
	c = ca [ s + i ];
	if (isC(c)) {
	i++;
	nc++;
	} else {
	break;
	}
	}
	// N?
	if ((s + i) < e) {
	c = ca [ s + 1 ];
	if (isN(c)) {
	i++;
	}
	}
	// H
	if ((s + i) < e) {
	c = ca [ s + i ];
	if (isH(c)) {
	i++;
	nh++;
	} else {
	break;
	}
	}
	} while (false);
	return (nc > 0) && (nh > 0) ? s + i : -1;
	}
	}
	// L := ( (C\|V) N? X* )?; where X = ( MATRA \| ACCENT MARK \| TONE MARK \| OTHER MARK )
	private int isLiveConsonant(int[] ca, int s, int e) {
	if (s < 0) {
	return -1;
	} else {
	int c;
	int i = 0;
	int nc = 0;
	int nv = 0;
	int nx = 0;
	do {
	// C
	if ((s + i) < e) {
	c = ca [ s + i ];
	if (isC(c)) {
	i++;
	nc++;
	} else if (isV(c)) {
	i++;
	nv++;
	} else {
	break;
	}
	}
	// N?
	if ((s + i) < e) {
	c = ca [ s + i ];
	if (isN(c)) {
	i++;
	}
	}
	// X*
	while ((s + i) < e) {
	c = ca [ s + i ];
	if (isX(c)) {
	i++;
	nx++;
	} else {
	break;
	}
	}
	} while (false);
	// if no X but has H, then ignore C\|I
	if (nx == 0) {
	if ((s + i) < e) {
	c = ca [ s + i ];
	if (isH(c)) {
	if (nc > 0) {
	nc--;
	} else if (nv > 0) {
	nv--;
	}
	}
	}
	}
	return ((nc > 0) \|\| (nv > 0)) ? s + i : -1;
	}
	}
	}

	// tamil character types
	static final short C_U = 0; // unassigned
	static final short C_C = 1; // consonant
	static final short C_V = 2; // vowel
	static final short C_M = 3; // vowel sign (matra)
	static final short C_S = 4; // symbol or sign
	static final short C_T = 5; // tone mark
	static final short C_A = 6; // accent mark
	static final short C_P = 7; // punctuation
	static final short C_D = 8; // digit
	static final short C_H = 9; // halant (virama)
	static final short C_O = 10; // other signs
	static final short C_N = 0x0100; // nukta(ized)
	static final short C_R = 0x0200; // reph(ized)
	static final short C_PRE = 0x0400; // pre-base
	static final short C_POST = 0x1000; // post-base
	static final short C_WRAP = C_PRE \| C_POST; // wrap (two part) vowel
	static final short C_M_TYPE = 0x00FF; // type mask
	static final short C_M_FLAGS = 0x7F00; // flag mask
	// tamil block range
	static final int CCA_START = 0x0B80; // first code point mapped by cca
	static final int CCA_END = 0x0C00; // last code point + 1 mapped by cca
	// tamil character type lookups
	static final short[] CCA = {
	C_U, // 0x0B80 //
	C_U, // 0x0B81 //
	C_O, // 0x0B82 // ANUSVARA
	C_O, // 0x0B83 // VISARGA
	C_U, // 0x0B84 //
	C_V, // 0x0B85 // A
	C_V, // 0x0B86 // AA
	C_V, // 0x0B87 // I
	C_V, // 0x0B88 // II
	C_V, // 0x0B89 // U
	C_V, // 0x0B8A // UU
	C_U, // 0x0B8B //
	C_U, // 0x0B8C //
	C_U, // 0x0B8D //
	C_V, // 0x0B8E // E
	C_V, // 0x0B8F // EE
	C_V, // 0x0B90 // AI
	C_U, // 0x0B91 //
	C_V, // 0x0B92 // O
	C_V, // 0x0B93 // OO
	C_V, // 0x0B94 // AU
	C_C, // 0x0B95 // KA
	C_U, // 0x0B96 //
	C_U, // 0x0B97 //
	C_U, // 0x0B98 //
	C_C, // 0x0B99 // NGA
	C_C, // 0x0B9A // CA
	C_U, // 0x0B9B //
	C_C, // 0x0B9C // JA
	C_U, // 0x0B9D //
	C_C, // 0x0B9E // NYA
	C_C, // 0x0B9F // TTA
	C_U, // 0x0BA0 //
	C_U, // 0x0BA1 //
	C_U, // 0x0BA2 //
	C_C, // 0x0BA3 // NNA
	C_C, // 0x0BA4 // TA
	C_U, // 0x0BA5 //
	C_U, // 0x0BA6 //
	C_U, // 0x0BA7 //
	C_C, // 0x0BA8 // NA
	C_C, // 0x0BA9 // NNNA
	C_C, // 0x0BAA // PA
	C_U, // 0x0BAB //
	C_U, // 0x0BAC //
	C_U, // 0x0BAD //
	C_C, // 0x0BAE // MA
	C_C, // 0x0BAF // YA
	C_C \| C_R, // 0x0BB0 // RA
	C_C \| C_R, // 0x0BB1 // RRA
	C_C, // 0x0BB2 // LA
	C_C, // 0x0BB3 // LLA
	C_C, // 0x0BB4 // LLLA
	C_C, // 0x0BB5 // VA
	C_C, // 0x0BB6 // SHA
	C_C, // 0x0BB7 // SSA
	C_C, // 0x0BB8 // SA
	C_C, // 0x0BB9 // HA
	C_U, // 0x0BBA //
	C_U, // 0x0BBB //
	C_U, // 0x0BBC //
	C_U, // 0x0BBD //
	C_M, // 0x0BBE // AA
	C_M, // 0x0BBF // I
	C_M, // 0x0BC0 // II
	C_M, // 0x0BC1 // U
	C_M, // 0x0BC2 // UU
	C_U, // 0x0BC3 //
	C_U, // 0x0BC4 //
	C_U, // 0x0BC5 //
	C_M \| C_PRE, // 0x0BC6 // E
	C_M \| C_PRE, // 0x0BC7 // EE
	C_M \| C_PRE, // 0x0BC8 // AI
	C_U, // 0x0BC9 //
	C_M \| C_WRAP, // 0x0BCA // O
	C_M \| C_WRAP, // 0x0BCB // OO
	C_M \| C_WRAP, // 0x0BCC // AU
	C_H, // 0x0BCD // VIRAMA (HALANT)
	C_U, // 0x0BCE //
	C_U, // 0x0BCF //
	C_S, // 0x0BD0 // OM
	C_U, // 0x0BD1 //
	C_U, // 0x0BD2 //
	C_U, // 0x0BD3 //
	C_U, // 0x0BD4 //
	C_U, // 0x0BD5 //
	C_U, // 0x0BD6 //
	C_M, // 0x0BD7 // AU LENGTH MARK
	C_U, // 0x0BD8 //
	C_U, // 0x0BD9 //
	C_U, // 0x0BDA //
	C_U, // 0x0BDB //
	C_U, // 0x0BDC //
	C_U, // 0x0BDD //
	C_U, // 0x0BDE //
	C_U, // 0x0BDF //
	C_U, // 0x0BE0 //
	C_U, // 0x0BE1 //
	C_U, // 0x0BE2 //
	C_U, // 0x0BE3 //
	C_U, // 0x0BE4 //
	C_U, // 0x0BE5 //
	C_D, // 0x0BE6 // ZERO
	C_D, // 0x0BE7 // ONE
	C_D, // 0x0BE8 // TWO
	C_D, // 0x0BE9 // THREE
	C_D, // 0x0BEA // FOUR
	C_D, // 0x0BEB // FIVE
	C_D, // 0x0BEC // SIX
	C_D, // 0x0BED // SEVEN
	C_D, // 0x0BEE // EIGHT
	C_D, // 0x0BEF // NINE
	C_S, // 0x0BF0 // TEN
	C_S, // 0x0BF1 // ONE HUNDRED
	C_S, // 0x0BF2 // ONE THOUSAND
	C_S, // 0x0BF3 // DAY SIGN (naal)
	C_S, // 0x0BF4 // MONTH SIGN (maatham)
	C_S, // 0x0BF5 // YEAR SIGN (varudam)
	C_S, // 0x0BF6 // DEBIT SIGN (patru)
	C_S, // 0x0BF7 // CREDIT SIGN (varavu)
	C_S, // 0x0BF8 // AS ABOVE SIGN (merpadi)
	C_S, // 0x0BF9 // RUPEE SIGN (rupai)
	C_S, // 0x0BFA // NUMBER SIGN (enn)
	C_U, // 0x0BFB //
	C_U, // 0x0BFC //
	C_U, // 0x0BFD //
	C_U, // 0x0BFE //
	C_U // 0x0BFF //
	};
	static int typeOf(int c) {
	if ((c >= CCA_START) && (c < CCA_END)) {
	return CCA [ c - CCA_START ] & C_M_TYPE;
	} else {
	return C_U;
	}
	}
	static boolean isType(int c, int t) {
	return typeOf(c) == t;
	}
	static boolean hasFlag(int c, int f) {
	if ((c >= CCA_START) && (c < CCA_END)) {
	return (CCA [ c - CCA_START ] & f) == f;
	} else {
	return false;
	}
	}
	static boolean isC(int c) {
	return isType(c, C_C);
	}
	static boolean isR(int c) {
	return isType(c, C_C) && hasR(c);
	}
	static boolean isV(int c) {
	return isType(c, C_V);
	}
	static boolean isN(int c) {
	return c == 0x093C;
	}
	static boolean isH(int c) {
	return c == 0x094D;
	}
	static boolean isM(int c) {
	return isType(c, C_M);
	}
	static boolean isPreM(int c) {
	return isType(c, C_M) && hasFlag(c, C_PRE);
	}
	static boolean isX(int c) {
	switch (typeOf(c)) {
	case C_M: // matra (combining vowel)
	case C_A: // accent mark
	case C_T: // tone mark
	case C_O: // other (modifying) mark
	return true;
	default:
	return false;
	}
	}
	static boolean hasR(int c) {
	return hasFlag(c, C_R);
	}
	static boolean hasN(int c) {
	return hasFlag(c, C_N);
	}

	}