blob: 26249bc22d574613bee14aeca5df8195ea451cf2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* $Id$ */
package org.apache.fop.complexscripts.scripts;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fop.complexscripts.fonts.GlyphTable;
import org.apache.fop.complexscripts.util.CharScript;
import org.apache.fop.complexscripts.util.GlyphContextTester;
import org.apache.fop.complexscripts.util.GlyphSequence;
import org.apache.fop.complexscripts.util.ScriptContextTester;
// CSOFF: AvoidNestedBlocksCheck
// CSOFF: NoWhitespaceAfterCheck
// CSOFF: InnerAssignmentCheck
// CSOFF: SimplifyBooleanReturnCheck
// CSOFF: EmptyForIteratorPadCheck
// CSOFF: WhitespaceAfterCheck
// CSOFF: ParameterNumberCheck
// CSOFF: LineLengthCheck
/**
* <p>The <code>IndicScriptProcessor</code> class implements a script processor for
* performing glyph substitution and positioning operations on content associated with the Indic script.</p>
*
* <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
*/
public class IndicScriptProcessor extends DefaultScriptProcessor {
/** logging instance */
private static final Log log = LogFactory.getLog(IndicScriptProcessor.class); // CSOK: ConstantNameCheck
/** required features to use for substitutions */
private static final String[] gsubReqFeatures = // CSOK: ConstantNameCheck
{
"abvf", // above base forms
"abvs", // above base substitutions
"akhn", // akhand
"blwf", // below base forms
"blws", // below base substitutions
"ccmp", // glyph composition/decomposition
"cjct", // conjunct forms
"clig", // contextual ligatures
"half", // half forms
"haln", // halant forms
"locl", // localized forms
"nukt", // nukta forms
"pref", // pre-base forms
"pres", // pre-base substitutions
"pstf", // post-base forms
"psts", // post-base substitutions
"rkrf", // rakar forms
"rphf", // reph form
"vatu" // vattu variants
};
/** optional features to use for substitutions */
private static final String[] gsubOptFeatures = // CSOK: ConstantNameCheck
{
"afrc", // alternative fractions
"calt", // contextual alternatives
"dlig" // discretionary ligatures
};
/** required features to use for positioning */
private static final String[] gposReqFeatures = // CSOK: ConstantNameCheck
{
"abvm", // above base marks
"blwm", // below base marks
"dist", // distance (adjustment)
"kern" // kerning
};
/** required features to use for positioning */
private static final String[] gposOptFeatures = // CSOK: ConstantNameCheck
{
};
private static class SubstitutionScriptContextTester implements ScriptContextTester {
private static Map/*<String,GlyphContextTester>*/ testerMap = new HashMap/*<String,GlyphContextTester>*/();
public GlyphContextTester getTester ( String feature ) {
return (GlyphContextTester) testerMap.get ( feature );
}
}
private static class PositioningScriptContextTester implements ScriptContextTester {
private static Map/*<String,GlyphContextTester>*/ testerMap = new HashMap/*<String,GlyphContextTester>*/();
public GlyphContextTester getTester ( String feature ) {
return (GlyphContextTester) testerMap.get ( feature );
}
}
/**
* Make script specific flavor of Indic script processor.
* @param script tag
* @return script processor instance
*/
public static ScriptProcessor makeProcessor ( String script ) {
switch ( CharScript.scriptCodeFromTag ( script ) ) {
case CharScript.SCRIPT_DEVANAGARI:
case CharScript.SCRIPT_DEVANAGARI_2:
return new DevanagariScriptProcessor ( script );
case CharScript.SCRIPT_GUJARATI:
case CharScript.SCRIPT_GUJARATI_2:
return new GujaratiScriptProcessor ( script );
case CharScript.SCRIPT_GURMUKHI:
case CharScript.SCRIPT_GURMUKHI_2:
return new GurmukhiScriptProcessor ( script );
// [TBD] implement other script processors
default:
return new IndicScriptProcessor ( script );
}
}
private final ScriptContextTester subContextTester;
private final ScriptContextTester posContextTester;
IndicScriptProcessor ( String script ) {
super ( script );
this.subContextTester = new SubstitutionScriptContextTester();
this.posContextTester = new PositioningScriptContextTester();
}
/** {@inheritDoc} */
public String[] getSubstitutionFeatures() {
return gsubReqFeatures;
}
/** {@inheritDoc} */
public String[] getOptionalSubstitutionFeatures() {
return gsubOptFeatures;
}
/** {@inheritDoc} */
public ScriptContextTester getSubstitutionContextTester() {
return subContextTester;
}
/** {@inheritDoc} */
public String[] getPositioningFeatures() {
return gposReqFeatures;
}
/** {@inheritDoc} */
public String[] getOptionalPositioningFeatures() {
return gposOptFeatures;
}
/** {@inheritDoc} */
public ScriptContextTester getPositioningContextTester() {
return posContextTester;
}
/** {@inheritDoc} */
@Override
public GlyphSequence substitute ( GlyphSequence gs, String script, String language, GlyphTable.UseSpec[] usa, ScriptContextTester sct ) {
assert usa != null;
// 1. syllabize
GlyphSequence[] sa = syllabize ( gs, script, language );
// 2. process each syllable
for ( int i = 0, n = sa.length; i < n; i++ ) {
GlyphSequence s = sa [ i ];
// apply basic shaping subs
for ( int j = 0, m = usa.length; j < m; j++ ) {
GlyphTable.UseSpec us = usa [ j ];
if ( isBasicShapingUse ( us ) ) {
s.setPredications ( true );
s = us.substitute ( s, script, language, sct );
}
}
// reorder pre-base matra
s = reorderPreBaseMatra ( s );
// reorder reph
s = reorderReph ( s );
// apply presentation subs
for ( int j = 0, m = usa.length; j < m; j++ ) {
GlyphTable.UseSpec us = usa [ j ];
if ( isPresentationUse ( us ) ) {
s.setPredications ( true );
s = us.substitute ( s, script, language, sct );
}
}
// record result
sa [ i ] = s;
}
// 3. return reassembled substituted syllables
return unsyllabize ( gs, sa );
}
/**
* Get script specific syllabizer class.
* @return a syllabizer class object or null
*/
protected Class<? extends Syllabizer> getSyllabizerClass() {
return null;
}
private GlyphSequence[] syllabize ( GlyphSequence gs, String script, String language ) {
return Syllabizer.getSyllabizer ( script, language, getSyllabizerClass() ) . syllabize ( gs );
}
private GlyphSequence unsyllabize ( GlyphSequence gs, GlyphSequence[] sa ) {
return GlyphSequence.join ( gs, sa );
}
private static Set<String> basicShapingFeatures;
private static final String[] basicShapingFeatureStrings = { // CSOK: ConstantNameCheck
"abvf",
"akhn",
"blwf",
"cjct",
"half",
"locl",
"nukt",
"pref",
"pstf",
"rkrf",
"rphf",
"vatu",
};
static {
basicShapingFeatures = new HashSet<String>();
for ( String s : basicShapingFeatureStrings ) {
basicShapingFeatures.add ( s );
}
}
private boolean isBasicShapingUse ( GlyphTable.UseSpec us ) {
assert us != null;
if ( basicShapingFeatures != null ) {
return basicShapingFeatures.contains ( us.getFeature() );
} else {
return false;
}
}
private static Set<String> presentationFeatures;
private static final String[] presentationFeatureStrings = { // CSOK: ConstantNameCheck
"abvs",
"blws",
"calt",
"haln",
"pres",
"psts",
};
static {
presentationFeatures = new HashSet<String>();
for ( String s : presentationFeatureStrings ) {
presentationFeatures.add ( s );
}
}
private boolean isPresentationUse ( GlyphTable.UseSpec us ) {
assert us != null;
if ( presentationFeatures != null ) {
return presentationFeatures.contains ( us.getFeature() );
} else {
return false;
}
}
private GlyphSequence reorderPreBaseMatra ( GlyphSequence gs ) {
int source;
if ( ( source = findPreBaseMatra ( gs ) ) >= 0 ) {
int target;
if ( ( target = findPreBaseMatraTarget ( gs, source ) ) >= 0 ) {
if ( target != source ) {
gs = reorder ( gs, source, target );
}
}
}
return gs;
}
/**
* Find pre-base matra in sequence.
* @param gs input sequence
* @return index of pre-base matra or -1 if not found
*/
protected int findPreBaseMatra ( GlyphSequence gs ) {
return -1;
}
/**
* Find pre-base matra target in sequence.
* @param gs input sequence
* @param source index of pre-base matra
* @return index of pre-base matra target or -1
*/
protected int findPreBaseMatraTarget ( GlyphSequence gs, int source ) {
return -1;
}
private GlyphSequence reorderReph ( GlyphSequence gs ) {
int source;
if ( ( source = findReph ( gs ) ) >= 0 ) {
int target;
if ( ( target = findRephTarget ( gs, source ) ) >= 0 ) {
if ( target != source ) {
gs = reorder ( gs, source, target );
}
}
}
return gs;
}
/**
* Find reph in sequence.
* @param gs input sequence
* @return index of reph or -1 if not found
*/
protected int findReph ( GlyphSequence gs ) {
return -1;
}
/**
* Find reph target in sequence.
* @param gs input sequence
* @param source index of reph
* @return index of reph target or -1
*/
protected int findRephTarget ( GlyphSequence gs, int source ) {
return -1;
}
private GlyphSequence reorder ( GlyphSequence gs, int source, int target ) {
return GlyphSequence.reorder ( gs, source, 1, target );
}
/** {@inheritDoc} */
@Override
public boolean position ( GlyphSequence gs, String script, String language, int fontSize, GlyphTable.UseSpec[] usa, int[] widths, int[][] adjustments, ScriptContextTester sct ) {
boolean adjusted = super.position ( gs, script, language, fontSize, usa, widths, adjustments, sct );
return adjusted;
}
/** Abstract syllabizer. */
protected abstract static class Syllabizer implements Comparable {
private String script;
private String language;
Syllabizer ( String script, String language ) {
this.script = script;
this.language = language;
}
/**
* Subdivide glyph sequence GS into syllabic segments each represented by a distinct
* output glyph sequence.
* @param gs input glyph sequence
* @return segmented syllabic glyph sequences
*/
abstract GlyphSequence[] syllabize ( GlyphSequence gs );
/** {@inheritDoc} */
public int hashCode() {
int hc = 0;
hc = 7 * hc + ( hc ^ script.hashCode() );
hc = 11 * hc + ( hc ^ language.hashCode() );
return hc;
}
/** {@inheritDoc} */
public boolean equals ( Object o ) {
if ( o instanceof Syllabizer ) {
Syllabizer s = (Syllabizer) o;
if ( ! s.script.equals ( script ) ) {
return false;
} else if ( ! s.language.equals ( language ) ) {
return false;
} else {
return true;
}
} else {
return false;
}
}
/** {@inheritDoc} */
public int compareTo ( Object o ) {
int d;
if ( o instanceof Syllabizer ) {
Syllabizer s = (Syllabizer) o;
if ( ( d = script.compareTo ( s.script ) ) == 0 ) {
d = language.compareTo ( s.language );
}
} else {
d = -1;
}
return d;
}
private static Map<String,Syllabizer> syllabizers = new HashMap<String,Syllabizer>();
static Syllabizer getSyllabizer ( String script, String language, Class<? extends Syllabizer> syllabizerClass ) {
String sid = makeSyllabizerId ( script, language );
Syllabizer s = syllabizers.get ( sid );
if ( s == null ) {
if ( ( s = makeSyllabizer ( script, language, syllabizerClass ) ) == null ) {
s = new DefaultSyllabizer ( script, language );
}
syllabizers.put ( sid, s );
}
return s;
}
static String makeSyllabizerId ( String script, String language ) {
return script + ":" + language;
}
static Syllabizer makeSyllabizer ( String script, String language, Class<? extends Syllabizer> syllabizerClass ) {
Syllabizer s;
try {
Constructor<? extends Syllabizer> cf = syllabizerClass.getDeclaredConstructor ( new Class[] { String.class, String.class } );
s = (Syllabizer) cf.newInstance ( script, language );
} catch ( NoSuchMethodException e ) {
s = null;
} catch ( InstantiationException e ) {
s = null;
} catch ( IllegalAccessException e ) {
s = null;
} catch ( InvocationTargetException e ) {
s = null;
}
return s;
}
}
/** Default syllabizer. */
protected static class DefaultSyllabizer extends Syllabizer {
DefaultSyllabizer ( String script, String language ) {
super ( script, language );
}
/** {@inheritDoc} */
@Override
GlyphSequence[] syllabize ( GlyphSequence gs ) {
int[] ca = gs.getCharacterArray ( false );
int nc = gs.getCharacterCount();
if ( nc == 0 ) {
return new GlyphSequence[] { gs };
} else {
return segmentize ( gs, segmentize ( ca, nc ) );
}
}
/**
* Construct array of segements from original character array (associated with original glyph sequence)
* @param ca input character sequence
* @param nc number of characters in sequence
* @return array of syllable segments
*/
protected Segment[] segmentize ( int[] ca, int nc ) {
Vector<Segment> sv = new Vector<Segment> ( nc );
for ( int s = 0, e = nc; s < e; ) {
int i;
if ( ( i = findStartOfSyllable ( ca, s, e ) ) > s ) {
// from s to i is non-syllable segment
sv.add ( new Segment ( s, i, Segment.OTHER ) );
s = i; // move s to start of syllable
} else if ( i > s ) {
// from s to e is non-syllable segment
sv.add ( new Segment ( s, e, Segment.OTHER ) );
s = e; // move s to end of input sequence
}
if ( ( i = findEndOfSyllable ( ca, s, e ) ) > s ) {
// from s to i is syllable segment
sv.add ( new Segment ( s, i, Segment.SYLLABLE ) );
s = i; // move s to end of syllable
} else {
// from s to e is non-syllable segment
sv.add ( new Segment ( s, e, Segment.OTHER ) );
s = e; // move s to end of input sequence
}
}
return sv.toArray ( new Segment [ sv.size() ] );
}
/**
* Construct array of glyph sequences from original glyph sequence and segment array.
* @param gs original input glyph sequence
* @param sa segment array
* @return array of glyph sequences each belonging to an (ordered) segment in SA
*/
protected GlyphSequence[] segmentize ( GlyphSequence gs, Segment[] sa ) {
int ng = gs.getGlyphCount();
int[] ga = gs.getGlyphArray ( false );
GlyphSequence.CharAssociation[] aa = gs.getAssociations ( 0, -1 );
Vector<GlyphSequence> nsv = new Vector<GlyphSequence>();
for ( int i = 0, ns = sa.length; i < ns; i++ ) {
Segment s = sa [ i ];
Vector<Integer> ngv = new Vector<Integer> ( ng );
Vector<GlyphSequence.CharAssociation> nav = new Vector<GlyphSequence.CharAssociation> ( ng );
for ( int j = 0; j < ng; j++ ) {
GlyphSequence.CharAssociation ca = aa [ j ];
if ( ca.contained ( s.getOffset(), s.getCount() ) ) {
ngv.add ( ga [ j ] );
nav.add ( ca );
}
}
if ( ngv.size() > 0 ) {
nsv.add ( new GlyphSequence ( gs, null, toIntArray ( ngv ), null, null, nav.toArray ( new GlyphSequence.CharAssociation [ nav.size() ] ), null ) );
}
}
if ( nsv.size() > 0 ) {
return nsv.toArray ( new GlyphSequence [ nsv.size() ] );
} else {
return new GlyphSequence[] { gs };
}
}
/**
* Find start of syllable in character array, starting at S, ending at E.
* @param ca character array
* @param s start index
* @param e end index
* @return index of start or E if no start found
*/
protected int findStartOfSyllable ( int[] ca, int s, int e ) {
return e;
}
/**
* Find end of syllable in character array, starting at S, ending at E.
* @param ca character array
* @param s start index
* @param e end index
* @return index of start or S if no end found
*/
protected int findEndOfSyllable ( int[] ca, int s, int e ) {
return s;
}
private static int[] toIntArray ( Vector<Integer> iv ) {
int ni = iv.size();
int[] ia = new int [ iv.size() ];
for ( int i = 0, n = ni; i < n; i++ ) {
ia [ i ] = (int) iv.get ( i );
}
return ia;
}
}
/** Syllabic segment. */
protected static class Segment {
static final int OTHER = 0; // other (non-syllable) characters
static final int SYLLABLE = 1; // (orthographic) syllable
private int start;
private int end;
private int type;
Segment ( int start, int end, int type ) {
this.start = start;
this.end = end;
this.type = type;
}
int getStart() {
return start;
}
int getEnd() {
return end;
}
int getOffset() {
return start;
}
int getCount() {
return end - start;
}
int getType() {
return type;
}
}
}