blob: 5e68c8763f5c197d6d8f1608a7502ecfbab8a1f7 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* $Id$ */
package org.apache.fop.complexscripts.scripts;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fop.complexscripts.bidi.BidiClass;
import org.apache.fop.complexscripts.bidi.BidiConstants;
import org.apache.fop.complexscripts.fonts.GlyphDefinitionTable;
import org.apache.fop.complexscripts.util.GlyphContextTester;
import org.apache.fop.complexscripts.util.GlyphSequence;
import org.apache.fop.complexscripts.util.ScriptContextTester;
// CSOFF: LineLengthCheck
/**
* <p>The <code>ArabicScriptProcessor</code> class implements a script processor for
* performing glyph substitution and positioning operations on content associated with the Arabic script.</p>
*
* <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
*/
public class ArabicScriptProcessor extends DefaultScriptProcessor {
/** logging instance */
private static final Log log = LogFactory.getLog(ArabicScriptProcessor.class);
/** features to use for substitutions */
private static final String[] GSUB_FEATURES =
{
"calt", // contextual alternates
"ccmp", // glyph composition/decomposition
"fina", // final (terminal) forms
"init", // initial forms
"isol", // isolated formas
"liga", // standard ligatures
"medi", // medial forms
"rlig" // required ligatures
};
/** features to use for positioning */
private static final String[] GPOS_FEATURES =
{
"curs", // cursive positioning
"kern", // kerning
"mark", // mark to base or ligature positioning
"mkmk" // mark to mark positioning
};
private static class SubstitutionScriptContextTester implements ScriptContextTester {
private static Map/*<String,GlyphContextTester>*/ testerMap = new HashMap/*<String,GlyphContextTester>*/();
static {
testerMap.put("fina", new GlyphContextTester() {
public boolean test(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
return inFinalContext(script, language, feature, gs, index, flags);
}
});
testerMap.put("init", new GlyphContextTester() {
public boolean test(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
return inInitialContext(script, language, feature, gs, index, flags);
}
});
testerMap.put("isol", new GlyphContextTester() {
public boolean test(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
return inIsolateContext(script, language, feature, gs, index, flags);
}
});
testerMap.put("liga", new GlyphContextTester() {
public boolean test(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
return inLigatureContext(script, language, feature, gs, index, flags);
}
});
testerMap.put("medi", new GlyphContextTester() {
public boolean test(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
return inMedialContext(script, language, feature, gs, index, flags);
}
});
}
public GlyphContextTester getTester(String feature) {
return (GlyphContextTester) testerMap.get(feature);
}
}
private static class PositioningScriptContextTester implements ScriptContextTester {
private static Map/*<String,GlyphContextTester>*/ testerMap = new HashMap/*<String,GlyphContextTester>*/();
public GlyphContextTester getTester(String feature) {
return (GlyphContextTester) testerMap.get(feature);
}
}
private final ScriptContextTester subContextTester;
private final ScriptContextTester posContextTester;
ArabicScriptProcessor(String script) {
super(script);
this.subContextTester = new SubstitutionScriptContextTester();
this.posContextTester = new PositioningScriptContextTester();
}
/** {@inheritDoc} */
public String[] getSubstitutionFeatures() {
return GSUB_FEATURES;
}
/** {@inheritDoc} */
public ScriptContextTester getSubstitutionContextTester() {
return subContextTester;
}
/** {@inheritDoc} */
public String[] getPositioningFeatures() {
return GPOS_FEATURES;
}
/** {@inheritDoc} */
public ScriptContextTester getPositioningContextTester() {
return posContextTester;
}
/** {@inheritDoc} */
@Override
public GlyphSequence reorderCombiningMarks(GlyphDefinitionTable gdef, GlyphSequence gs, int[][] gpa, String script, String language) {
// a side effect of BIDI reordering is to order combining marks before their base, so we need to override the default here to
// prevent double reordering
return gs;
}
private static boolean inFinalContext(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
GlyphSequence.CharAssociation a = gs.getAssociation(index);
int[] ca = gs.getCharacterArray(false);
int nc = gs.getCharacterCount();
if (nc == 0) {
return false;
} else {
int s = a.getStart();
int e = a.getEnd();
if (!hasFinalPrecedingContext(ca, nc, s, e)) {
return false;
} else if (forcesFinalThisContext(ca, nc, s, e)) {
return true;
} else if (!hasFinalFollowingContext(ca, nc, s, e)) {
return false;
} else {
return true;
}
}
}
private static boolean inInitialContext(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
GlyphSequence.CharAssociation a = gs.getAssociation(index);
int[] ca = gs.getCharacterArray(false);
int nc = gs.getCharacterCount();
if (nc == 0) {
return false;
} else {
int s = a.getStart();
int e = a.getEnd();
if (!hasInitialPrecedingContext(ca, nc, s, e)) {
return false;
} else if (!hasInitialFollowingContext(ca, nc, s, e)) {
return false;
} else {
return true;
}
}
}
private static boolean inIsolateContext(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
GlyphSequence.CharAssociation a = gs.getAssociation(index);
int nc = gs.getCharacterCount();
if (nc == 0) {
return false;
} else if ((a.getStart() == 0) && (a.getEnd() == nc)) {
return true;
} else {
return false;
}
}
private static boolean inLigatureContext(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
GlyphSequence.CharAssociation a = gs.getAssociation(index);
int[] ca = gs.getCharacterArray(false);
int nc = gs.getCharacterCount();
if (nc == 0) {
return false;
} else {
int s = a.getStart();
int e = a.getEnd();
if (!hasLigaturePrecedingContext(ca, nc, s, e)) {
return false;
} else if (!hasLigatureFollowingContext(ca, nc, s, e)) {
return false;
} else {
return true;
}
}
}
private static boolean inMedialContext(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
GlyphSequence.CharAssociation a = gs.getAssociation(index);
int[] ca = gs.getCharacterArray(false);
int nc = gs.getCharacterCount();
if (nc == 0) {
return false;
} else {
int s = a.getStart();
int e = a.getEnd();
if (!hasMedialPrecedingContext(ca, nc, s, e)) {
return false;
} else if (!hasMedialThisContext(ca, nc, s, e)) {
return false;
} else if (!hasMedialFollowingContext(ca, nc, s, e)) {
return false;
} else {
return true;
}
}
}
private static boolean hasFinalPrecedingContext(int[] ca, int nc, int s, int e) {
int chp = 0;
int clp = 0;
for (int i = s; i > 0; i--) {
int k = i - 1;
if ((k >= 0) && (k < nc)) {
chp = ca [ k ];
clp = BidiClass.getBidiClass(chp);
if (clp != BidiConstants.NSM) {
break;
}
}
}
if (clp != BidiConstants.AL) {
return false;
} else if (hasIsolateInitial(chp)) {
return false;
} else {
return true;
}
}
private static boolean forcesFinalThisContext(int[] ca, int nc, int s, int e) {
int chl = 0;
int cll = 0;
for (int i = 0, n = e - s; i < n; i++) {
int k = n - i - 1;
int j = s + k;
if ((j >= 0) && (j < nc)) {
chl = ca [ j ];
cll = BidiClass.getBidiClass(chl);
if (cll != BidiConstants.NSM) {
break;
}
}
}
if (cll != BidiConstants.AL) {
return false;
}
if (hasIsolateInitial(chl)) {
return true;
} else {
return false;
}
}
private static boolean hasFinalFollowingContext(int[] ca, int nc, int s, int e) {
int chf = 0;
int clf = 0;
for (int i = e, n = nc; i < n; i++) {
chf = ca [ i ];
clf = BidiClass.getBidiClass(chf);
if (clf != BidiConstants.NSM) {
break;
}
}
if (clf != BidiConstants.AL) {
return true;
} else if (hasIsolateFinal(chf)) {
return true;
} else {
return false;
}
}
private static boolean hasInitialPrecedingContext(int[] ca, int nc, int s, int e) {
int chp = 0;
int clp = 0;
for (int i = s; i > 0; i--) {
int k = i - 1;
if ((k >= 0) && (k < nc)) {
chp = ca [ k ];
clp = BidiClass.getBidiClass(chp);
if (clp != BidiConstants.NSM) {
break;
}
}
}
if (clp != BidiConstants.AL) {
return true;
} else if (hasIsolateInitial(chp)) {
return true;
} else {
return false;
}
}
private static boolean hasInitialFollowingContext(int[] ca, int nc, int s, int e) {
int chf = 0;
int clf = 0;
for (int i = e, n = nc; i < n; i++) {
chf = ca [ i ];
clf = BidiClass.getBidiClass(chf);
if (clf != BidiConstants.NSM) {
break;
}
}
if (clf != BidiConstants.AL) {
return false;
} else if (hasIsolateFinal(chf)) {
return false;
} else {
return true;
}
}
private static boolean hasMedialPrecedingContext(int[] ca, int nc, int s, int e) {
int chp = 0;
int clp = 0;
for (int i = s; i > 0; i--) {
int k = i - 1;
if ((k >= 0) && (k < nc)) {
chp = ca [ k ];
clp = BidiClass.getBidiClass(chp);
if (clp != BidiConstants.NSM) {
break;
}
}
}
if (clp != BidiConstants.AL) {
return false;
} else if (hasIsolateInitial(chp)) {
return false;
} else {
return true;
}
}
private static boolean hasMedialThisContext(int[] ca, int nc, int s, int e) {
int chf = 0; // first non-NSM char in [s,e)
int clf = 0;
for (int i = 0, n = e - s; i < n; i++) {
int k = s + i;
if ((k >= 0) && (k < nc)) {
chf = ca [ s + i ];
clf = BidiClass.getBidiClass(chf);
if (clf != BidiConstants.NSM) {
break;
}
}
}
if (clf != BidiConstants.AL) {
return false;
}
int chl = 0; // last non-NSM char in [s,e)
int cll = 0;
for (int i = 0, n = e - s; i < n; i++) {
int k = n - i - 1;
int j = s + k;
if ((j >= 0) && (j < nc)) {
chl = ca [ j ];
cll = BidiClass.getBidiClass(chl);
if (cll != BidiConstants.NSM) {
break;
}
}
}
if (cll != BidiConstants.AL) {
return false;
}
if (hasIsolateFinal(chf)) {
return false;
} else if (hasIsolateInitial(chl)) {
return false;
} else {
return true;
}
}
private static boolean hasMedialFollowingContext(int[] ca, int nc, int s, int e) {
int chf = 0;
int clf = 0;
for (int i = e, n = nc; i < n; i++) {
chf = ca [ i ];
clf = BidiClass.getBidiClass(chf);
if (clf != BidiConstants.NSM) {
break;
}
}
if (clf != BidiConstants.AL) {
return false;
} else if (hasIsolateFinal(chf)) {
return false;
} else {
return true;
}
}
private static boolean hasLigaturePrecedingContext(int[] ca, int nc, int s, int e) {
return true;
}
private static boolean hasLigatureFollowingContext(int[] ca, int nc, int s, int e) {
int chf = 0;
int clf = 0;
for (int i = e, n = nc; i < n; i++) {
chf = ca [ i ];
clf = BidiClass.getBidiClass(chf);
if (clf != BidiConstants.NSM) {
break;
}
}
if (clf == BidiConstants.AL) {
return true;
} else {
return false;
}
}
/**
* Ordered array of Unicode scalars designating those Arabic (Script) Letters
* which exhibit an isolated form in word initial position.
*/
private static int[] isolatedInitials = {
0x0621, // HAMZA
0x0622, // ALEF WITH MADDA ABOVE
0x0623, // ALEF WITH HAMZA ABOVE
0x0624, // WAW WITH HAMZA ABOVE
0x0625, // ALEF WITH HAMZA BELOWW
0x0627, // ALEF
0x062F, // DAL
0x0630, // THAL
0x0631, // REH
0x0632, // ZAIN
0x0648, // WAW
0x0671, // ALEF WASLA
0x0672, // ALEF WITH WAVY HAMZA ABOVE
0x0673, // ALEF WITH WAVY HAMZA BELOW
0x0675, // HIGH HAMZA ALEF
0x0676, // HIGH HAMZA WAW
0x0677, // U WITH HAMZA ABOVE
0x0688, // DDAL
0x0689, // DAL WITH RING
0x068A, // DAL WITH DOT BELOW
0x068B, // DAL WITH DOT BELOW AND SMALL TAH
0x068C, // DAHAL
0x068D, // DDAHAL
0x068E, // DUL
0x068F, // DUL WITH THREE DOTS ABOVE DOWNWARDS
0x0690, // DUL WITH FOUR DOTS ABOVE
0x0691, // RREH
0x0692, // REH WITH SMALL V
0x0693, // REH WITH RING
0x0694, // REH WITH DOT BELOW
0x0695, // REH WITH SMALL V BELOW
0x0696, // REH WITH DOT BELOW AND DOT ABOVE
0x0697, // REH WITH TWO DOTS ABOVE
0x0698, // JEH
0x0699, // REH WITH FOUR DOTS ABOVE
0x06C4, // WAW WITH RING
0x06C5, // KIRGHIZ OE
0x06C6, // OE
0x06C7, // U
0x06C8, // YU
0x06C9, // KIRGHIZ YU
0x06CA, // WAW WITH TWO DOTS ABOVE
0x06CB, // VE
0x06CF, // WAW WITH DOT ABOVE
0x06EE, // DAL WITH INVERTED V
0x06EF // REH WITH INVERTED V
};
private static boolean hasIsolateInitial(int ch) {
return Arrays.binarySearch(isolatedInitials, ch) >= 0;
}
/**
* Ordered array of Unicode scalars designating those Arabic (Script) Letters
* which exhibit an isolated form in word final position.
*/
private static int[] isolatedFinals = {
0x0621 // HAMZA
};
private static boolean hasIsolateFinal(int ch) {
return Arrays.binarySearch(isolatedFinals, ch) >= 0;
}
}