| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* $Id$ */ |
| |
| package org.apache.fop.text.bidi; |
| |
| import java.io.BufferedReader; |
| import java.io.FileReader; |
| import java.io.FileWriter; |
| import java.io.InputStreamReader; |
| import java.io.PrintWriter; |
| import java.net.URL; |
| import java.util.Arrays; |
| import java.util.Iterator; |
| import java.util.SortedSet; |
| import java.util.TreeSet; |
| |
| import org.apache.fop.util.BidiConstants; |
| import org.apache.fop.util.License; |
| |
| // CSOFF: LineLength |
| // CSOFF: NoWhitespaceAfter |
| |
| /** |
| * <p>Utility for generating a Java class representing bidirectional |
| * class properties from the Unicode property files.</p> |
| * |
| * <p>This code is derived in part from GenerateLineBreakUtils.java.</p> |
| * |
| * @author Glenn Adams |
| */ |
| public final class GenerateBidiClassUtils { |
| |
| private GenerateBidiClassUtils() { |
| } |
| |
| private static byte[] bcL1 = new byte[256]; // ascii and basic latin blocks ( 0x0000 - 0x00FF ) |
| private static byte[] bcR1 = new byte[368]; // hebrew and arabic blocks ( 0x0590 - 0x06FF ) |
| private static int[] bcS1; // interval start indices |
| private static int[] bcE1; // interval end indices |
| private static byte[] bcC1; // interval bid classes |
| |
| /** |
| * Generate a class managing bidi class properties for Unicode characters. |
| * |
| * @param bidiFileName name (as URL) of file containing bidi type data |
| * @param outFileName name of the output file |
| * @throws Exception |
| */ |
| private static void convertBidiClassProperties(String bidiFileName, String outFileName) throws Exception { |
| |
| readBidiClassProperties(bidiFileName); |
| |
| // generate class |
| PrintWriter out = new PrintWriter(new FileWriter(outFileName)); |
| License.writeJavaLicenseId(out); |
| out.println(); |
| out.println("package org.apache.fop.text.bidi;"); |
| out.println(); |
| out.println("import java.util.Arrays;"); |
| out.println("import org.apache.fop.util.BidiConstants;"); |
| out.println(); |
| out.println("// CSOFF: WhitespaceAfterCheck"); |
| out.println("// CSOFF: LineLengthCheck"); |
| out.println(); |
| out.println("/*"); |
| out.println(" * !!! THIS IS A GENERATED FILE !!!"); |
| out.println(" * If updates to the source are needed, then:"); |
| out.println(" * - apply the necessary modifications to"); |
| out.println(" * 'src/codegen/unicode/java/org/apache/fop/text/bidi/GenerateBidiClassUtils.java'"); |
| out.println(" * - run 'ant codegen-unicode', which will generate a new BidiClassUtils.java"); |
| out.println(" * in 'src/java/org/apache/fop/text/bidi'"); |
| out.println(" * - commit BOTH changed files"); |
| out.println(" */"); |
| out.println(); |
| out.println("/** Bidirectional class utilities. */"); |
| out.println("public final class BidiClassUtils {"); |
| out.println(); |
| out.println("private BidiClassUtils() {"); |
| out.println("}"); |
| out.println(); |
| dumpData(out); |
| out.println ("/**"); |
| out.println (" * Lookup bidi class for character expressed as unicode scalar value."); |
| out.println (" * @param ch a unicode scalar value"); |
| out.println (" * @return bidi class"); |
| out.println (" */"); |
| out.println("public static int getBidiClass ( int ch ) {"); |
| out.println(" if ( ch <= 0x00FF ) {"); |
| out.println(" return bcL1 [ ch - 0x0000 ];"); |
| out.println(" } else if ( ( ch >= 0x0590 ) && ( ch <= 0x06FF ) ) {"); |
| out.println(" return bcR1 [ ch - 0x0590 ];"); |
| out.println(" } else {"); |
| out.println(" return getBidiClass ( ch, bcS1, bcE1, bcC1 );"); |
| out.println(" }"); |
| out.println("}"); |
| out.println(); |
| out.println("private static int getBidiClass ( int ch, int[] sa, int[] ea, byte[] ca ) {"); |
| out.println(" int k = Arrays.binarySearch ( sa, ch );"); |
| out.println(" if ( k >= 0 ) {"); |
| out.println(" return ca [ k ];"); |
| out.println(" } else {"); |
| out.println(" k = - ( k + 1 );"); |
| out.println(" if ( k == 0 ) {"); |
| out.println(" return BidiConstants.L;"); |
| out.println(" } else if ( ch <= ea [ k - 1 ] ) {"); |
| out.println(" return ca [ k - 1 ];"); |
| out.println(" } else {"); |
| out.println(" return BidiConstants.L;"); |
| out.println(" }"); |
| out.println(" }"); |
| out.println("}"); |
| out.println(); |
| out.println("}"); |
| out.flush(); |
| out.close(); |
| } |
| |
| /** |
| * Read bidi class property data. |
| * |
| * @param bidiFileName name (as URL) of bidi type data |
| */ |
| private static void readBidiClassProperties(String bidiFileName) throws Exception { |
| // read property names |
| BufferedReader b = new BufferedReader(new InputStreamReader(new URL(bidiFileName).openStream())); |
| String line; |
| int lineNumber = 0; |
| TreeSet intervals = new TreeSet(); |
| while ( ( line = b.readLine() ) != null ) { |
| lineNumber++; |
| if ( line.startsWith("#") ) { |
| continue; |
| } else if ( line.length() == 0 ) { |
| continue; |
| } else { |
| if ( line.indexOf ( "#" ) != -1 ) { |
| line = ( line.split ( "#" ) ) [ 0 ]; |
| } |
| String[] fa = line.split ( ";" ); |
| if ( fa.length == 2 ) { |
| int[] interval = parseInterval ( fa[0].trim() ); |
| byte bidiClass = (byte) parseBidiClass ( fa[1].trim() ); |
| if ( interval[1] == interval[0] ) { // singleton |
| int c = interval[0]; |
| if ( c <= 0x00FF ) { |
| if ( bcL1 [ c - 0x0000 ] == 0 ) { |
| bcL1 [ c - 0x0000 ] = bidiClass; |
| } else { |
| throw new Exception ( "duplicate singleton entry: " + c ); |
| } |
| } else if ( ( c >= 0x0590 ) && ( c <= 0x06FF ) ) { |
| if ( bcR1 [ c - 0x0590 ] == 0 ) { |
| bcR1 [ c - 0x0590 ] = bidiClass; |
| } else { |
| throw new Exception ( "duplicate singleton entry: " + c ); |
| } |
| } else { |
| addInterval ( intervals, c, c, bidiClass ); |
| } |
| } else { // non-singleton |
| int s = interval[0]; |
| int e = interval[1]; // inclusive |
| if ( s <= 0x00FF ) { |
| for ( int i = s; i <= e; i++ ) { |
| if ( i <= 0x00FF ) { |
| if ( bcL1 [ i - 0x0000 ] == 0 ) { |
| bcL1 [ i - 0x0000 ] = bidiClass; |
| } else { |
| throw new Exception ( "duplicate singleton entry: " + i ); |
| } |
| } else { |
| addInterval ( intervals, i, e, bidiClass ); |
| break; |
| } |
| } |
| } else if ( ( s >= 0x0590 ) && ( s <= 0x06FF ) ) { |
| for ( int i = s; i <= e; i++ ) { |
| if ( i <= 0x06FF ) { |
| if ( bcR1 [ i - 0x0590 ] == 0 ) { |
| bcR1 [ i - 0x0590 ] = bidiClass; |
| } else { |
| throw new Exception ( "duplicate singleton entry: " + i ); |
| } |
| } else { |
| addInterval ( intervals, i, e, bidiClass ); |
| break; |
| } |
| } |
| } else { |
| addInterval ( intervals, s, e, bidiClass ); |
| } |
| } |
| } else { |
| throw new Exception ( "bad syntax, line(" + lineNumber + "): " + line ); |
| } |
| } |
| } |
| // compile interval search data |
| int ivIndex = 0, niv = intervals.size(); |
| bcS1 = new int [ niv ]; |
| bcE1 = new int [ niv ]; |
| bcC1 = new byte [ niv ]; |
| for ( Iterator it = intervals.iterator(); it.hasNext(); ivIndex++ ) { |
| Interval iv = (Interval) it.next(); |
| bcS1[ivIndex] = iv.start; |
| bcE1[ivIndex] = iv.end; |
| bcC1[ivIndex] = (byte) iv.bidiClass; |
| } |
| // test data |
| test(); |
| } |
| |
| private static int[] parseInterval ( String interval ) throws Exception { |
| int s, e; |
| String[] fa = interval.split("\\.\\."); |
| if ( fa.length == 1 ) { |
| s = Integer.parseInt ( fa[0], 16 ); |
| e = s; |
| } else if ( fa.length == 2 ) { |
| s = Integer.parseInt ( fa[0], 16 ); |
| e = Integer.parseInt ( fa[1], 16 ); |
| } else { |
| throw new Exception ( "bad interval syntax: " + interval ); |
| } |
| if ( e < s ) { |
| throw new Exception ( "bad interval, start must be less than or equal to end: " + interval ); |
| } |
| return new int[] {s, e}; |
| } |
| |
| private static int parseBidiClass ( String bidiClass ) { |
| int bc = 0; |
| if ( "L".equals ( bidiClass ) ) { |
| bc = BidiConstants.L; |
| } else if ( "LRE".equals ( bidiClass ) ) { |
| bc = BidiConstants.LRE; |
| } else if ( "LRO".equals ( bidiClass ) ) { |
| bc = BidiConstants.LRO; |
| } else if ( "R".equals ( bidiClass ) ) { |
| bc = BidiConstants.R; |
| } else if ( "AL".equals ( bidiClass ) ) { |
| bc = BidiConstants.AL; |
| } else if ( "RLE".equals ( bidiClass ) ) { |
| bc = BidiConstants.RLE; |
| } else if ( "RLO".equals ( bidiClass ) ) { |
| bc = BidiConstants.RLO; |
| } else if ( "PDF".equals ( bidiClass ) ) { |
| bc = BidiConstants.PDF; |
| } else if ( "EN".equals ( bidiClass ) ) { |
| bc = BidiConstants.EN; |
| } else if ( "ES".equals ( bidiClass ) ) { |
| bc = BidiConstants.ES; |
| } else if ( "ET".equals ( bidiClass ) ) { |
| bc = BidiConstants.ET; |
| } else if ( "AN".equals ( bidiClass ) ) { |
| bc = BidiConstants.AN; |
| } else if ( "CS".equals ( bidiClass ) ) { |
| bc = BidiConstants.CS; |
| } else if ( "NSM".equals ( bidiClass ) ) { |
| bc = BidiConstants.NSM; |
| } else if ( "BN".equals ( bidiClass ) ) { |
| bc = BidiConstants.BN; |
| } else if ( "B".equals ( bidiClass ) ) { |
| bc = BidiConstants.B; |
| } else if ( "S".equals ( bidiClass ) ) { |
| bc = BidiConstants.S; |
| } else if ( "WS".equals ( bidiClass ) ) { |
| bc = BidiConstants.WS; |
| } else if ( "ON".equals ( bidiClass ) ) { |
| bc = BidiConstants.ON; |
| } else { |
| throw new IllegalArgumentException ( "unknown bidi class: " + bidiClass ); |
| } |
| return bc; |
| } |
| |
| private static void addInterval ( SortedSet intervals, int start, int end, int bidiClass ) { |
| intervals.add ( new Interval ( start, end, bidiClass ) ); |
| } |
| |
| private static void dumpData ( PrintWriter out ) { |
| boolean first; |
| StringBuffer sb = new StringBuffer(); |
| |
| // bcL1 |
| first = true; |
| sb.setLength(0); |
| out.println ( "private static byte[] bcL1 = {" ); |
| for ( int i = 0; i < bcL1.length; i++ ) { |
| if ( ! first ) { |
| sb.append ( "," ); |
| } else { |
| first = false; |
| } |
| sb.append ( bcL1[i] ); |
| if ( sb.length() > 120 ) { |
| sb.append(','); |
| out.println(sb); |
| first = true; |
| sb.setLength(0); |
| } |
| } |
| if ( sb.length() > 0 ) { |
| out.println(sb); |
| } |
| out.println ( "};" ); |
| out.println(); |
| |
| // bcR1 |
| first = true; |
| sb.setLength(0); |
| out.println ( "private static byte[] bcR1 = {" ); |
| for ( int i = 0; i < bcR1.length; i++ ) { |
| if ( ! first ) { |
| sb.append ( "," ); |
| } else { |
| first = false; |
| } |
| sb.append ( bcR1[i] ); |
| if ( sb.length() > 120 ) { |
| sb.append(','); |
| out.println(sb); |
| first = true; |
| sb.setLength(0); |
| } |
| } |
| if ( sb.length() > 0 ) { |
| out.println(sb); |
| } |
| out.println ( "};" ); |
| out.println(); |
| |
| // bcS1 |
| first = true; |
| sb.setLength(0); |
| out.println ( "private static int[] bcS1 = {" ); |
| for ( int i = 0; i < bcS1.length; i++ ) { |
| if ( ! first ) { |
| sb.append ( "," ); |
| } else { |
| first = false; |
| } |
| sb.append ( bcS1[i] ); |
| if ( sb.length() > 120 ) { |
| sb.append(','); |
| out.println(sb); |
| first = true; |
| sb.setLength(0); |
| } |
| } |
| if ( sb.length() > 0 ) { |
| out.println(sb); |
| } |
| out.println ( "};" ); |
| out.println(); |
| |
| // bcE1 |
| first = true; |
| sb.setLength(0); |
| out.println ( "private static int[] bcE1 = {" ); |
| for ( int i = 0; i < bcE1.length; i++ ) { |
| if ( ! first ) { |
| sb.append ( "," ); |
| } else { |
| first = false; |
| } |
| sb.append ( bcE1[i] ); |
| if ( sb.length() > 120 ) { |
| sb.append(','); |
| out.println(sb); |
| first = true; |
| sb.setLength(0); |
| } |
| } |
| if ( sb.length() > 0 ) { |
| out.println(sb); |
| } |
| out.println ( "};" ); |
| out.println(); |
| |
| // bcC1 |
| first = true; |
| sb.setLength(0); |
| out.println ( "private static byte[] bcC1 = {" ); |
| for ( int i = 0; i < bcC1.length; i++ ) { |
| if ( ! first ) { |
| sb.append ( "," ); |
| } else { |
| first = false; |
| } |
| sb.append ( bcC1[i] ); |
| if ( sb.length() > 120 ) { |
| sb.append(','); |
| out.println(sb); |
| first = true; |
| sb.setLength(0); |
| } |
| } |
| if ( sb.length() > 0 ) { |
| out.println(sb); |
| } |
| out.println ( "};" ); |
| out.println(); |
| } |
| |
| private static int getBidiClass ( int ch ) { |
| if ( ch <= 0x00FF ) { |
| return bcL1 [ ch - 0x0000 ]; |
| } else if ( ( ch >= 0x0590 ) && ( ch <= 0x06FF ) ) { |
| return bcR1 [ ch - 0x0590 ]; |
| } else { |
| return getBidiClass ( ch, bcS1, bcE1, bcC1 ); |
| } |
| } |
| |
| private static int getBidiClass ( int ch, int[] sa, int[] ea, byte[] ca ) { |
| int k = Arrays.binarySearch ( sa, ch ); |
| if ( k >= 0 ) { |
| return ca [ k ]; |
| } else { |
| k = - ( k + 1 ); |
| if ( k == 0 ) { |
| return BidiConstants.L; |
| } else if ( ch <= ea [ k - 1 ] ) { |
| return ca [ k - 1 ]; |
| } else { |
| return BidiConstants.L; |
| } |
| } |
| } |
| |
| private static final int[] testData = // CSOK: ConstantName |
| { |
| 0x000000, BidiConstants.BN, |
| 0x000009, BidiConstants.S, |
| 0x00000A, BidiConstants.B, |
| 0x00000C, BidiConstants.WS, |
| 0x000020, BidiConstants.WS, |
| 0x000023, BidiConstants.ET, |
| 0x000028, BidiConstants.ON, |
| 0x00002B, BidiConstants.ES, |
| 0x00002C, BidiConstants.CS, |
| 0x000031, BidiConstants.EN, |
| 0x00003A, BidiConstants.CS, |
| 0x000041, BidiConstants.L, |
| 0x000300, BidiConstants.NSM, |
| 0x000374, BidiConstants.ON, |
| 0x0005BE, BidiConstants.R, |
| 0x000601, BidiConstants.AN, |
| 0x000608, BidiConstants.AL, |
| 0x000670, BidiConstants.NSM, |
| 0x000710, BidiConstants.AL, |
| 0x0007FA, BidiConstants.R, |
| 0x000970, BidiConstants.L, |
| 0x001392, BidiConstants.ON, |
| 0x002000, BidiConstants.WS, |
| 0x00200E, BidiConstants.L, |
| 0x00200F, BidiConstants.R, |
| 0x00202A, BidiConstants.LRE, |
| 0x00202B, BidiConstants.RLE, |
| 0x00202C, BidiConstants.PDF, |
| 0x00202D, BidiConstants.LRO, |
| 0x00202E, BidiConstants.RLO, |
| 0x0020E1, BidiConstants.NSM, |
| 0x002212, BidiConstants.ES, |
| 0x002070, BidiConstants.EN, |
| 0x003000, BidiConstants.WS, |
| 0x003009, BidiConstants.ON, |
| 0x00FBD4, BidiConstants.AL, |
| 0x00FE69, BidiConstants.ET, |
| 0x00FF0C, BidiConstants.CS, |
| 0x00FEFF, BidiConstants.BN, |
| 0x01034A, BidiConstants.L, |
| 0x010E60, BidiConstants.AN, |
| 0x01F100, BidiConstants.EN, |
| 0x0E0001, BidiConstants.BN, |
| 0x0E0100, BidiConstants.NSM, |
| 0x10FFFF, BidiConstants.BN |
| }; |
| |
| private static void test() throws Exception { |
| for ( int i = 0, n = testData.length / 2; i < n; i++ ) { |
| int ch = testData [ i * 2 + 0 ]; |
| int tc = testData [ i * 2 + 1 ]; |
| int bc = getBidiClass ( ch ); |
| if ( bc != tc ) { |
| throw new Exception ( "test mapping failed for character (0x" + Integer.toHexString(ch) + "): expected " + tc + ", got " + bc ); |
| } |
| } |
| } |
| |
| /** |
| * Main entry point for generator. |
| * @param args array of command line arguments |
| */ |
| public static void main(String[] args) { |
| String bidiFileName = "http://www.unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt"; |
| String outFileName = "BidiClassUtils.java"; |
| boolean ok = true; |
| for (int i = 0; i < args.length; i = i + 2) { |
| if (i + 1 == args.length) { |
| ok = false; |
| } else { |
| String opt = args[i]; |
| if ("-b".equals(opt)) { |
| bidiFileName = args [i + 1]; |
| } else if ("-o".equals(opt)) { |
| outFileName = args [i + 1]; |
| } else { |
| ok = false; |
| } |
| } |
| } |
| if (!ok) { |
| System.out.println("Usage: GenerateBidiClassUtils [-b <bidiFile>] [-o <outputFile>]"); |
| System.out.println(" defaults:"); |
| System.out.println(" <bidiFile>: " + bidiFileName); |
| System.out.println(" <outputFile>: " + outFileName); |
| } else { |
| try { |
| convertBidiClassProperties(bidiFileName, outFileName); |
| System.out.println("Generated " + outFileName + " from"); |
| System.out.println(" <bidiFile>: " + bidiFileName); |
| } catch (Exception e) { |
| System.out.println("An unexpected error occured"); |
| e.printStackTrace(); |
| } |
| } |
| } |
| |
| private static class Interval implements Comparable { |
| int start; // CSOK: VisibilityModifier |
| int end; // CSOK: VisibilityModifier |
| int bidiClass; // CSOK: VisibilityModifier |
| Interval ( int start, int end, int bidiClass ) { |
| this.start = start; |
| this.end = end; |
| this.bidiClass = bidiClass; |
| } |
| public int compareTo ( Object o ) { |
| Interval iv = (Interval) o; |
| if ( start < iv.start ) { |
| return -1; |
| } else if ( start > iv.start ) { |
| return 1; |
| } else if ( end < iv.end ) { |
| return -1; |
| } else if ( end > iv.end ) { |
| return 1; |
| } else { |
| return 0; |
| } |
| } |
| } |
| } |