blob: fcabad3964cdc646712a2e36d158ff995e3d8ce9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* $Id$ */
package org.apache.fop.complexscripts.scripts;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fop.complexscripts.fonts.GlyphTable;
import org.apache.fop.complexscripts.util.CharScript;
import org.apache.fop.complexscripts.util.GlyphContextTester;
import org.apache.fop.complexscripts.util.GlyphSequence;
import org.apache.fop.complexscripts.util.ScriptContextTester;
// CSOFF: LineLengthCheck
/**
* <p>The <code>IndicScriptProcessor</code> class implements a script processor for
* performing glyph substitution and positioning operations on content associated with the Indic script.</p>
*
* <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
*/
public class IndicScriptProcessor extends DefaultScriptProcessor {
/** logging instance */
private static final Log log = LogFactory.getLog(IndicScriptProcessor.class);
/** required features to use for substitutions */
private static final String[] GSUB_REQ_FEATURES =
{
"abvf", // above base forms
"abvs", // above base substitutions
"akhn", // akhand
"blwf", // below base forms
"blws", // below base substitutions
"ccmp", // glyph composition/decomposition
"cjct", // conjunct forms
"clig", // contextual ligatures
"half", // half forms
"haln", // halant forms
"locl", // localized forms
"nukt", // nukta forms
"pref", // pre-base forms
"pres", // pre-base substitutions
"pstf", // post-base forms
"psts", // post-base substitutions
"rkrf", // rakar forms
"rphf", // reph form
"vatu" // vattu variants
};
/** optional features to use for substitutions */
private static final String[] GSUB_OPT_FEATURES =
{
"afrc", // alternative fractions
"calt", // contextual alternatives
"dlig" // discretionary ligatures
};
/** required features to use for positioning */
private static final String[] GPOS_REQ_FEATURES =
{
"abvm", // above base marks
"blwm", // below base marks
"dist", // distance (adjustment)
"kern" // kerning
};
/** required features to use for positioning */
private static final String[] GPOS_OPT_FEATURES =
{
};
private static class SubstitutionScriptContextTester implements ScriptContextTester {
private static Map/*<String,GlyphContextTester>*/ testerMap = new HashMap/*<String,GlyphContextTester>*/();
public GlyphContextTester getTester(String feature) {
return (GlyphContextTester) testerMap.get(feature);
}
}
private static class PositioningScriptContextTester implements ScriptContextTester {
private static Map/*<String,GlyphContextTester>*/ testerMap = new HashMap/*<String,GlyphContextTester>*/();
public GlyphContextTester getTester(String feature) {
return (GlyphContextTester) testerMap.get(feature);
}
}
/**
* Make script specific flavor of Indic script processor.
* @param script tag
* @return script processor instance
*/
public static ScriptProcessor makeProcessor(String script) {
switch (CharScript.scriptCodeFromTag(script)) {
case CharScript.SCRIPT_DEVANAGARI:
case CharScript.SCRIPT_DEVANAGARI_2:
return new DevanagariScriptProcessor(script);
case CharScript.SCRIPT_GUJARATI:
case CharScript.SCRIPT_GUJARATI_2:
return new GujaratiScriptProcessor(script);
case CharScript.SCRIPT_GURMUKHI:
case CharScript.SCRIPT_GURMUKHI_2:
return new GurmukhiScriptProcessor(script);
// [TBD] implement other script processors
default:
return new IndicScriptProcessor(script);
}
}
private final ScriptContextTester subContextTester;
private final ScriptContextTester posContextTester;
IndicScriptProcessor(String script) {
super(script);
this.subContextTester = new SubstitutionScriptContextTester();
this.posContextTester = new PositioningScriptContextTester();
}
/** {@inheritDoc} */
public String[] getSubstitutionFeatures() {
return GSUB_REQ_FEATURES;
}
/** {@inheritDoc} */
public String[] getOptionalSubstitutionFeatures() {
return GSUB_OPT_FEATURES;
}
/** {@inheritDoc} */
public ScriptContextTester getSubstitutionContextTester() {
return subContextTester;
}
/** {@inheritDoc} */
public String[] getPositioningFeatures() {
return GPOS_REQ_FEATURES;
}
/** {@inheritDoc} */
public String[] getOptionalPositioningFeatures() {
return GPOS_OPT_FEATURES;
}
/** {@inheritDoc} */
public ScriptContextTester getPositioningContextTester() {
return posContextTester;
}
/** {@inheritDoc} */
@Override
public GlyphSequence substitute(GlyphSequence gs, String script, String language, GlyphTable.UseSpec[] usa, ScriptContextTester sct) {
assert usa != null;
// 1. syllabize
GlyphSequence[] sa = syllabize(gs, script, language);
// 2. process each syllable
for (int i = 0, n = sa.length; i < n; i++) {
GlyphSequence s = sa [ i ];
// apply basic shaping subs
for (int j = 0, m = usa.length; j < m; j++) {
GlyphTable.UseSpec us = usa [ j ];
if (isBasicShapingUse(us)) {
s.setPredications(true);
s = us.substitute(s, script, language, sct);
}
}
// reorder pre-base matra
s = reorderPreBaseMatra(s);
// reorder reph
s = reorderReph(s);
// apply presentation subs
for (int j = 0, m = usa.length; j < m; j++) {
GlyphTable.UseSpec us = usa [ j ];
if (isPresentationUse(us)) {
s.setPredications(true);
s = us.substitute(s, script, language, sct);
}
}
// record result
sa [ i ] = s;
}
// 3. return reassembled substituted syllables
return unsyllabize(gs, sa);
}
/**
* Get script specific syllabizer class.
* @return a syllabizer class object or null
*/
protected Class<? extends Syllabizer> getSyllabizerClass() {
return null;
}
private GlyphSequence[] syllabize(GlyphSequence gs, String script, String language) {
return Syllabizer.getSyllabizer(script, language, getSyllabizerClass()) .syllabize(gs);
}
private GlyphSequence unsyllabize(GlyphSequence gs, GlyphSequence[] sa) {
return GlyphSequence.join(gs, sa);
}
private static Set<String> basicShapingFeatures;
private static final String[] BASIC_SHAPING_FEATURE_STRINGS = {
"abvf",
"akhn",
"blwf",
"cjct",
"half",
"locl",
"nukt",
"pref",
"pstf",
"rkrf",
"rphf",
"vatu",
};
static {
basicShapingFeatures = new HashSet<String>();
for (String s : BASIC_SHAPING_FEATURE_STRINGS) {
basicShapingFeatures.add(s);
}
}
private boolean isBasicShapingUse(GlyphTable.UseSpec us) {
assert us != null;
if (basicShapingFeatures != null) {
return basicShapingFeatures.contains(us.getFeature());
} else {
return false;
}
}
private static Set<String> presentationFeatures;
private static final String[] PRESENTATION_FEATURE_STRINGS = {
"abvs",
"blws",
"calt",
"haln",
"pres",
"psts",
};
static {
presentationFeatures = new HashSet<String>();
for (String s : PRESENTATION_FEATURE_STRINGS) {
presentationFeatures.add(s);
}
}
private boolean isPresentationUse(GlyphTable.UseSpec us) {
assert us != null;
if (presentationFeatures != null) {
return presentationFeatures.contains(us.getFeature());
} else {
return false;
}
}
private GlyphSequence reorderPreBaseMatra(GlyphSequence gs) {
int source;
if ((source = findPreBaseMatra(gs)) >= 0) {
int target;
if ((target = findPreBaseMatraTarget(gs, source)) >= 0) {
if (target != source) {
gs = reorder(gs, source, target);
}
}
}
return gs;
}
/**
* Find pre-base matra in sequence.
* @param gs input sequence
* @return index of pre-base matra or -1 if not found
*/
protected int findPreBaseMatra(GlyphSequence gs) {
return -1;
}
/**
* Find pre-base matra target in sequence.
* @param gs input sequence
* @param source index of pre-base matra
* @return index of pre-base matra target or -1
*/
protected int findPreBaseMatraTarget(GlyphSequence gs, int source) {
return -1;
}
private GlyphSequence reorderReph(GlyphSequence gs) {
int source;
if ((source = findReph(gs)) >= 0) {
int target;
if ((target = findRephTarget(gs, source)) >= 0) {
if (target != source) {
gs = reorder(gs, source, target);
}
}
}
return gs;
}
/**
* Find reph in sequence.
* @param gs input sequence
* @return index of reph or -1 if not found
*/
protected int findReph(GlyphSequence gs) {
return -1;
}
/**
* Find reph target in sequence.
* @param gs input sequence
* @param source index of reph
* @return index of reph target or -1
*/
protected int findRephTarget(GlyphSequence gs, int source) {
return -1;
}
private GlyphSequence reorder(GlyphSequence gs, int source, int target) {
return GlyphSequence.reorder(gs, source, 1, target);
}
/** {@inheritDoc} */
@Override
public boolean position(GlyphSequence gs, String script, String language, int fontSize, GlyphTable.UseSpec[] usa, int[] widths, int[][] adjustments, ScriptContextTester sct) {
boolean adjusted = super.position(gs, script, language, fontSize, usa, widths, adjustments, sct);
return adjusted;
}
/** Abstract syllabizer. */
protected abstract static class Syllabizer implements Comparable {
private String script;
private String language;
Syllabizer(String script, String language) {
this.script = script;
this.language = language;
}
/**
* Subdivide glyph sequence GS into syllabic segments each represented by a distinct
* output glyph sequence.
* @param gs input glyph sequence
* @return segmented syllabic glyph sequences
*/
abstract GlyphSequence[] syllabize(GlyphSequence gs);
/** {@inheritDoc} */
public int hashCode() {
int hc = 0;
hc = 7 * hc + (hc ^ script.hashCode());
hc = 11 * hc + (hc ^ language.hashCode());
return hc;
}
/** {@inheritDoc} */
public boolean equals(Object o) {
if (o instanceof Syllabizer) {
Syllabizer s = (Syllabizer) o;
if (!s.script.equals(script)) {
return false;
} else {
return s.language.equals(language);
}
} else {
return false;
}
}
/** {@inheritDoc} */
public int compareTo(Object o) {
int d;
if (o instanceof Syllabizer) {
Syllabizer s = (Syllabizer) o;
if ((d = script.compareTo(s.script)) == 0) {
d = language.compareTo(s.language);
}
} else {
d = -1;
}
return d;
}
private static Map<String, Syllabizer> syllabizers = new HashMap<String, Syllabizer>();
static Syllabizer getSyllabizer(String script, String language, Class<? extends Syllabizer> syllabizerClass) {
String sid = makeSyllabizerId(script, language);
Syllabizer s = syllabizers.get(sid);
if (s == null) {
if ((s = makeSyllabizer(script, language, syllabizerClass)) == null) {
s = new DefaultSyllabizer(script, language);
}
syllabizers.put(sid, s);
}
return s;
}
static String makeSyllabizerId(String script, String language) {
return script + ":" + language;
}
static Syllabizer makeSyllabizer(String script, String language, Class<? extends Syllabizer> syllabizerClass) {
Syllabizer s;
try {
Constructor<? extends Syllabizer> cf = syllabizerClass.getDeclaredConstructor(new Class[] { String.class, String.class });
s = (Syllabizer) cf.newInstance(script, language);
} catch (NoSuchMethodException e) {
s = null;
} catch (InstantiationException e) {
s = null;
} catch (IllegalAccessException e) {
s = null;
} catch (InvocationTargetException e) {
s = null;
}
return s;
}
}
/** Default syllabizer. */
protected static class DefaultSyllabizer extends Syllabizer {
DefaultSyllabizer(String script, String language) {
super(script, language);
}
/** {@inheritDoc} */
@Override
GlyphSequence[] syllabize(GlyphSequence gs) {
int[] ca = gs.getCharacterArray(false);
int nc = gs.getCharacterCount();
if (nc == 0) {
return new GlyphSequence[] { gs };
} else {
return segmentize(gs, segmentize(ca, nc));
}
}
/**
* Construct array of segements from original character array (associated with original glyph sequence)
* @param ca input character sequence
* @param nc number of characters in sequence
* @return array of syllable segments
*/
protected Segment[] segmentize(int[] ca, int nc) {
Vector<Segment> sv = new Vector<Segment>(nc);
for (int s = 0, e = nc; s < e; ) {
int i;
if ((i = findStartOfSyllable(ca, s, e)) > s) {
// from s to i is non-syllable segment
sv.add(new Segment(s, i, Segment.OTHER));
s = i; // move s to start of syllable
} else if (i > s) {
// from s to e is non-syllable segment
sv.add(new Segment(s, e, Segment.OTHER));
s = e; // move s to end of input sequence
}
if ((i = findEndOfSyllable(ca, s, e)) > s) {
// from s to i is syllable segment
sv.add(new Segment(s, i, Segment.SYLLABLE));
s = i; // move s to end of syllable
} else {
// from s to e is non-syllable segment
sv.add(new Segment(s, e, Segment.OTHER));
s = e; // move s to end of input sequence
}
}
return sv.toArray(new Segment [ sv.size() ]);
}
/**
* Construct array of glyph sequences from original glyph sequence and segment array.
* @param gs original input glyph sequence
* @param sa segment array
* @return array of glyph sequences each belonging to an (ordered) segment in SA
*/
protected GlyphSequence[] segmentize(GlyphSequence gs, Segment[] sa) {
int ng = gs.getGlyphCount();
int[] ga = gs.getGlyphArray(false);
GlyphSequence.CharAssociation[] aa = gs.getAssociations(0, -1);
Vector<GlyphSequence> nsv = new Vector<GlyphSequence>();
for (int i = 0, ns = sa.length; i < ns; i++) {
Segment s = sa [ i ];
Vector<Integer> ngv = new Vector<Integer>(ng);
Vector<GlyphSequence.CharAssociation> nav = new Vector<GlyphSequence.CharAssociation>(ng);
for (int j = 0; j < ng; j++) {
GlyphSequence.CharAssociation ca = aa [ j ];
if (ca.contained(s.getOffset(), s.getCount())) {
ngv.add(ga [ j ]);
nav.add(ca);
}
}
if (ngv.size() > 0) {
nsv.add(new GlyphSequence(gs, null, toIntArray(ngv), null, null, nav.toArray(new GlyphSequence.CharAssociation [ nav.size() ]), null));
}
}
if (nsv.size() > 0) {
return nsv.toArray(new GlyphSequence [ nsv.size() ]);
} else {
return new GlyphSequence[] { gs };
}
}
/**
* Find start of syllable in character array, starting at S, ending at E.
* @param ca character array
* @param s start index
* @param e end index
* @return index of start or E if no start found
*/
protected int findStartOfSyllable(int[] ca, int s, int e) {
return e;
}
/**
* Find end of syllable in character array, starting at S, ending at E.
* @param ca character array
* @param s start index
* @param e end index
* @return index of start or S if no end found
*/
protected int findEndOfSyllable(int[] ca, int s, int e) {
return s;
}
private static int[] toIntArray(Vector<Integer> iv) {
int ni = iv.size();
int[] ia = new int [ iv.size() ];
for (int i = 0, n = ni; i < n; i++) {
ia [ i ] = (int) iv.get(i);
}
return ia;
}
}
/** Syllabic segment. */
protected static class Segment {
static final int OTHER = 0; // other (non-syllable) characters
static final int SYLLABLE = 1; // (orthographic) syllable
private int start;
private int end;
private int type;
Segment(int start, int end, int type) {
this.start = start;
this.end = end;
this.type = type;
}
int getStart() {
return start;
}
int getEnd() {
return end;
}
int getOffset() {
return start;
}
int getCount() {
return end - start;
}
int getType() {
return type;
}
}
}