| #!/usr/bin/perl |
| #************************************************************** |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #************************************************************** |
| |
| |
| |
| # The following files must be available in a ./input subdir: |
| |
| # <http://www.unicode.org/Public/UNIDATA/Unihan.txt>: |
| # "Unicode version: 3.1.1 Table version: 1.1 Date: 28 June 2001" |
| # contains descriptions for: |
| # U+3400..4DFF CJK Unified Ideographs Extension A |
| # U+4E00..9FFF CJK Unified Ideographs |
| # U+F900..FAFF CJK Compatibility Ideographs |
| # U+20000..2F7FF CJK Unified Ideographs Extension B |
| # U+2F800..2FFFF CJK Compatibility Ideographs Supplement |
| |
| # <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/CNS11643.TXT>: |
| # "Unicode version: 1.1 Table version: 0.0d1 Date: 21 October 1994" |
| # contains mappings for CNS 11643-1986 |
| |
| # <http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/ftp/CJKtable/Uni2CNS.Z>: |
| # "Unicode version: 1.1 Table version: 0.49 Date: 26 March 1998" |
| # contains mappings for CNS 11643-1992 that are incompatible with |
| # CNS11643.TXT |
| |
| $id = "Cns116431992"; |
| |
| sub isValidUtf32 |
| { |
| my $utf32 = $_[0]; |
| return $utf32 >= 0 && $utf32 <= 0x10FFFF |
| && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF) |
| && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF) |
| && ($utf32 & 0xFFFF) < 0xFFFE; |
| } |
| |
| sub printUtf32 |
| { |
| my $utf32 = $_[0]; |
| return sprintf("U+%04X", $utf32); |
| } |
| |
| sub isValidCns116431992 |
| { |
| my $plane = $_[0]; |
| my $row = $_[1]; |
| my $column = $_[2]; |
| return $plane >= 1 && $plane <= 16 |
| && $row >= 1 && $row <= 94 |
| && $column >= 1 && $column <= 94; |
| } |
| |
| sub printCns116431992 |
| { |
| my $plane = $_[0]; |
| my $row = $_[1]; |
| my $column = $_[2]; |
| return sprintf("%d-%02d/%02d", $plane, $row, $column); |
| } |
| |
| sub printStats |
| { |
| my $used = $_[0]; |
| my $space = $_[1]; |
| return sprintf("%d/%d bytes (%.1f%%)", |
| $used, |
| $space, |
| $used * 100 / $space); |
| } |
| |
| sub printSpaces |
| { |
| my $column_width = $_[0]; |
| my $columns_per_line = $_[1]; |
| my $end = $_[2]; |
| $output = ""; |
| for ($i = int($end / $columns_per_line) * $columns_per_line; |
| $i < $end; |
| ++$i) |
| { |
| for ($j = 0; $j < $column_width; ++$j) |
| { |
| $output = $output . " "; |
| } |
| } |
| return $output; |
| } |
| |
| $count_Unihan_txt = 0; |
| $count_CNS11643_TXT = 0; |
| $count_Uni2CNS = 0; |
| |
| if (1) |
| { |
| $filename = "Unihan.txt"; |
| open IN, ("input/" . $filename) or die "Cannot read " . $filename; |
| while (<IN>) |
| { |
| if (/^U\+([0-9A-F]+)\tkCNS1992\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/) |
| { |
| $utf32 = oct("0x" . $1); |
| $cns_plane = oct("0x" . $2); |
| $cns_row = oct("0x" . $3) - 0x20; |
| $cns_column = oct("0x" . $4) - 0x20; |
| isValidUtf32($utf32) |
| or die "Bad UTF32 char U+" . printUtf32($utf32); |
| isValidCns116431992($cns_plane, $cns_row, $cns_column) |
| or die "Bad CNS11643-1992 char " |
| . printCns116431992($cns_plane, |
| $cns_row, |
| $cns_column); |
| if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) |
| { |
| $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; |
| $cns_plane_used[$cns_plane] = 1; |
| ++$count_Unihan_txt; |
| } |
| else |
| { |
| ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) |
| or die "Mapping " |
| . printCns116431992($cns_plane, |
| $cns_row, |
| $cns_column) |
| . " to " |
| . printUtf32($cns_map[$cns_plane] |
| [$cns_row] |
| [$cns_column]) |
| . ", NOT " |
| . printUtf32($utf32); |
| } |
| } |
| elsif (/^U\+([0-9A-F]+)\tkIRG_TSource\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/) |
| { |
| $utf32 = oct("0x" . $1); |
| $cns_plane = oct("0x" . $2); |
| $cns_row = oct("0x" . $3) - 0x20; |
| $cns_column = oct("0x" . $4) - 0x20; |
| isValidUtf32($utf32) |
| or die "Bad UTF32 char U+" . printUtf32($utf32); |
| isValidCns116431992($cns_plane, $cns_row, $cns_column) |
| or die "Bad CNS11643-1992 char " |
| . printCns116431992($cns_plane, |
| $cns_row, |
| $cns_column); |
| if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) |
| { |
| $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; |
| $cns_plane_used[$cns_plane] = 1; |
| ++$count_Unihan_txt; |
| } |
| else |
| { |
| ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) |
| or print "WARNING! Mapping ", |
| printCns116431992($cns_plane, |
| $cns_row, |
| $cns_column), |
| " to ", |
| printUtf32($cns_map[$cns_plane] |
| [$cns_row] |
| [$cns_column]), |
| ", NOT ", |
| printUtf32($utf32), |
| "\n"; |
| } |
| } |
| elsif (/^U\+([0-9A-F]+)\tkCNS1992\t.*$/) |
| { |
| die "Bad format"; |
| } |
| } |
| close IN; |
| } |
| |
| if (1) |
| { |
| $filename = "CNS11643.TXT"; |
| open IN, ("input/" . $filename) or die "Cannot read " . $filename; |
| while (<IN>) |
| { |
| if (/0x([0-9A-F])([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t0x([0-9A-F]+)\t\#.*$/) |
| { |
| $utf32 = oct("0x" . $4); |
| $cns_plane = oct("0x" . $1); |
| $cns_row = oct("0x" . $2) - 0x20; |
| $cns_column = oct("0x" . $3) - 0x20; |
| isValidUtf32($utf32) |
| or die "Bad UTF32 char U+" . printUtf32($utf32); |
| isValidCns116431992($cns_plane, $cns_row, $cns_column) |
| or die "Bad CNS11643-1992 char " |
| . printCns116431992($cns_plane, |
| $cns_row, |
| $cns_column); |
| if ($cns_plane <= 2) |
| { |
| if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) |
| { |
| $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; |
| $cns_plane_used[$cns_plane] = 1; |
| ++$count_CNS11643_TXT; |
| } |
| else |
| { |
| ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) |
| or die "Mapping " |
| . printCns116431992($cns_plane, |
| $cns_row, |
| $cns_column) |
| . " to " |
| . printUtf32($cns_map[$cns_plane] |
| [$cns_row] |
| [$cns_column]) |
| . ", NOT " |
| . printUtf32($utf32); |
| } |
| } |
| } |
| } |
| close IN; |
| } |
| |
| if (0) |
| { |
| $filename = "Uni2CNS"; |
| open IN, ("input/" . $filename) or die "Cannot read " . $filename; |
| while (<IN>) |
| { |
| if (/([0-9A-F]+)\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t.*$/) |
| { |
| $utf32 = oct("0x" . $1); |
| $cns_plane = oct("0x" . $2); |
| $cns_row = oct("0x" . $3) - 0x20; |
| $cns_column = oct("0x" . $4) - 0x20; |
| isValidUtf32($utf32) |
| or die "Bad UTF32 char U+" . printUtf32($utf32); |
| isValidCns116431992($cns_plane, $cns_row, $cns_column) |
| or die "Bad CNS11643-1992 char " |
| . printCns116431992($cns_plane, |
| $cns_row, |
| $cns_column); |
| if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) |
| { |
| $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; |
| $cns_plane_used[$cns_plane] = 1; |
| ++$count_Uni2CNS; |
| } |
| else |
| { |
| # ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) |
| # or die "Mapping " |
| # . printCns116431992($cns_plane, |
| # $cns_row, |
| # $cns_column) |
| # . " to " |
| # . printUtf32($cns_map[$cns_plane] |
| # [$cns_row] |
| # [$cns_column]) |
| # . ", NOT " |
| # . printUtf32($utf32); |
| } |
| if ($cns_plane == 1) |
| { |
| print printCns116431992($cns_plane, $cns_row, $cns_column), |
| "\n"; |
| } |
| } |
| } |
| close IN; |
| } |
| |
| for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) |
| { |
| if (defined($cns_plane_used[$cns_plane])) |
| { |
| for ($cns_row = 1; $cns_row <= 94; ++$cns_row) |
| { |
| for ($cns_column = 1; $cns_column <= 94; ++$cns_column) |
| { |
| if (defined($cns_map[$cns_plane][$cns_row][$cns_column])) |
| { |
| $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column]; |
| $uni_plane = $utf32 >> 16; |
| $uni_page = ($utf32 >> 8) & 0xFF; |
| $uni_index = $utf32 & 0xFF; |
| if (!defined($uni_plane_used[$uni_plane]) |
| || !defined($uni_page_used[$uni_plane][$uni_page]) |
| || !defined($uni_map[$uni_plane] |
| [$uni_page] |
| [$uni_index])) |
| { |
| $uni_map[$uni_plane][$uni_page][$uni_index] |
| = ($cns_plane << 16) |
| | ($cns_row << 8) |
| | $cns_column; |
| $uni_plane_used[$uni_plane] = 1; |
| $uni_page_used[$uni_plane][$uni_page] = 1; |
| } |
| else |
| { |
| $cns1 = $uni_map[$uni_plane][$uni_page][$uni_index]; |
| $cns1_plane = $cns1 >> 16; |
| $cns1_row = ($cns1 >> 8) & 0xFF; |
| $cns1_column = $cns1 & 0xFF; |
| |
| # Do not map from Unicode to Fictious Character Set |
| # Extensions (Lunde, p. 131), if possible: |
| if ($cns_plane == 3 |
| && ($cns_row == 66 && $cns_column > 38 |
| || $cns_row > 66)) |
| { |
| print " (", |
| printUtf32($utf32), |
| " to fictious ", |
| printCns116431992($cns_plane, |
| $cns_row, |
| $cns_column), |
| " ignored, favouring ", |
| printCns116431992($cns1_plane, |
| $cns1_row, |
| $cns1_column), |
| ")\n"; |
| } |
| elsif ($cns1_plane == 3 |
| && ($cns1_row == 66 && $cns1_column > 38 |
| || $cns1_row > 66)) |
| { |
| $uni_map[$uni_plane][$uni_page][$uni_index] |
| = ($cns_plane << 16) |
| | ($cns_row << 8) |
| | $cns_column; |
| print " (", |
| printUtf32($utf32), |
| " to fictious ", |
| printCns116431992($cns1_plane, |
| $cns1_row, |
| $cns1_column), |
| " ignored, favouring ", |
| printCns116431992($cns_plane, |
| $cns_row, |
| $cns_column), |
| ")\n"; |
| } |
| else |
| { |
| print "WARNING! Mapping ", |
| printUtf32($utf32), |
| " to ", |
| printCns116431992($cns1_plane, |
| $cns1_row, |
| $cns1_column), |
| ", NOT ", |
| printCns116431992($cns_plane, |
| $cns_row, |
| $cns_column), |
| "\n"; |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| if (defined($uni_plane_used[0]) && defined($uni_page_used[0][0])) |
| { |
| for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32) |
| { |
| if (defined($uni_map[0][0][$uni_index])) |
| { |
| $cns = $uni_map[0][0][$utf32]; |
| die "Mapping " |
| . printUtf32($utf32) |
| . " to " |
| . printCns116431992($cns >> 16, |
| ($cns >> 8) & 0xFF, |
| $cns & 0xFF); |
| } |
| } |
| } |
| |
| $filename = lc($id) . ".tab"; |
| open OUT, ("> " . $filename) or die "Cannot write " . $filename; |
| |
| { |
| $filename = lc($id). ".pl"; |
| open IN, $filename or die "Cannot read ". $filename; |
| $first = 1; |
| while (<IN>) |
| { |
| if (/^\#!.*$/) |
| { |
| } |
| elsif (/^\#(\*.*)$/) |
| { |
| if ($first == 1) |
| { |
| print OUT "/", $1, "\n"; |
| $first = 0; |
| } |
| else |
| { |
| print OUT " ", substr($1, 0, length($1) - 1), "/\n"; |
| } |
| } |
| elsif (/^\# (.*)$/) |
| { |
| print OUT " *", $1, "\n"; |
| } |
| elsif (/^\#(.*)$/) |
| { |
| print OUT " *", $1, "\n"; |
| } |
| else |
| { |
| goto done; |
| } |
| } |
| done: |
| } |
| |
| print OUT "\n", |
| "#ifndef _SAL_TYPES_H_\n", |
| "#include \"sal/types.h\"\n", |
| "#endif\n", |
| "\n"; |
| |
| print OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n"; |
| $cns_data_index = 0; |
| for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) |
| { |
| if (defined($cns_plane_used[$cns_plane])) |
| { |
| $cns_rows = 0; |
| $cns_chars = 0; |
| for ($cns_row = 1; $cns_row <= 94; ++$cns_row) |
| { |
| $cns_row_first = -1; |
| for ($cns_column = 1; $cns_column <= 94; ++$cns_column) |
| { |
| if (defined($cns_map[$cns_plane][$cns_row][$cns_column])) |
| { |
| if ($cns_row_first == -1) |
| { |
| $cns_row_first = $cns_column; |
| } |
| $cns_row_last = $cns_column; |
| } |
| } |
| if ($cns_row_first != -1) |
| { |
| $cns_data_offsets[$cns_plane][$cns_row] = $cns_data_index; |
| ++$cns_rows; |
| print OUT " /* plane ", $cns_plane, ", row ", $cns_row, |
| " */\n"; |
| |
| $cns_row_surrogates_first = -1; |
| $cns_row_chars = 0; |
| $cns_row_surrogates = 0; |
| |
| print OUT " ", $cns_row_first, " | (", $cns_row_last, |
| " << 8), /* first, last */\n"; |
| ++$cns_data_index; |
| |
| print OUT " ", printSpaces(7, 10, $cns_row_first); |
| $bol = 0; |
| for ($cns_column = $cns_row_first; |
| $cns_column <= $cns_row_last; |
| ++$cns_column) |
| { |
| if ($bol == 1) |
| { |
| print OUT " "; |
| $bol = 0; |
| } |
| if (defined($cns_map[$cns_plane][$cns_row][$cns_column])) |
| { |
| $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column]; |
| ++$cns_row_chars; |
| if ($utf32 <= 0xFFFF) |
| { |
| printf OUT "0x%04X,", $utf32; |
| } |
| else |
| { |
| ++$cns_row_surrogates; |
| printf OUT "0x%04X,", |
| (0xD800 | (($utf32 - 0x10000) >> 10)); |
| if ($cns_row_surrogates_first == -1) |
| { |
| $cns_row_surrogates_first = $cns_column; |
| } |
| $cns_row_surrogates_last = $cns_column; |
| } |
| } |
| else |
| { |
| printf OUT "0xffff,"; |
| } |
| ++$cns_data_index; |
| if ($cns_column % 10 == 9) |
| { |
| print OUT "\n"; |
| $bol = 1; |
| } |
| } |
| if ($bol == 0) |
| { |
| print OUT "\n"; |
| } |
| |
| if ($cns_row_surrogates_first != -1) |
| { |
| print OUT " ", $cns_row_surrogates_first, |
| ", /* first low-surrogate */\n"; |
| ++$cns_data_index; |
| |
| print OUT " ", |
| printSpaces(7, 10, $cns_row_surrogates_first); |
| $bol = 0; |
| for ($cns_column = $cns_row_surrogates_first; |
| $cns_column <= $cns_row_surrogates_last; |
| ++$cns_column) |
| { |
| if ($bol == 1) |
| { |
| print OUT " "; |
| $bol = 0; |
| } |
| $utf32 = 0; |
| if (defined($cns_map[$cns_plane] |
| [$cns_row] |
| [$cns_column])) |
| { |
| $utf32 |
| = $cns_map[$cns_plane][$cns_row][$cns_column]; |
| } |
| if ($utf32 <= 0xFFFF) |
| { |
| printf OUT " 0,"; |
| } |
| else |
| { |
| printf OUT "0x%04X,", |
| (0xDC00 |
| | (($utf32 - 0x10000) & 0x3FF)); |
| } |
| ++$cns_data_index; |
| if ($cns_column % 10 == 9) |
| { |
| print OUT "\n"; |
| $bol = 1; |
| } |
| } |
| if ($bol == 0) |
| { |
| print OUT "\n"; |
| } |
| } |
| |
| $cns_chars += $cns_row_chars; |
| $cns_data_space[$cns_plane][$cns_row] |
| = ($cns_data_index |
| - $cns_data_offsets[$cns_plane][$cns_row]) * 2; |
| $cns_data_used[$cns_plane][$cns_row] |
| = (1 + $cns_row_chars |
| + ($cns_row_surrogates == 0 ? |
| 0 : 1 + $cns_row_surrogates)) * 2; |
| } |
| else |
| { |
| print OUT " /* plane ", $cns_plane, ", row ", $cns_row, |
| ": --- */\n"; |
| $cns_data_offsets[$cns_plane][$cns_row] = -1; |
| } |
| } |
| print "cns plane ", |
| $cns_plane, |
| ": rows = ", |
| $cns_rows, |
| ", chars = ", |
| $cns_chars, |
| "\n"; |
| } |
| } |
| print OUT "};\n\n"; |
| |
| print OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n"; |
| for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) |
| { |
| if (defined ($cns_plane_used[$cns_plane])) |
| { |
| $cns_rowoffsets_used[$cns_plane] = 0; |
| for ($cns_row = 1; $cns_row <= 94; ++$cns_row) |
| { |
| if ($cns_data_offsets[$cns_plane][$cns_row] == -1) |
| { |
| print OUT " -1, /* plane ", |
| $cns_plane, |
| ", row ", |
| $cns_row, |
| " */\n"; |
| } |
| else |
| { |
| print OUT " ", |
| $cns_data_offsets[$cns_plane][$cns_row], |
| ", /* plane ", |
| $cns_plane, |
| ", row ", |
| $cns_row, |
| "; ", |
| printStats($cns_data_used[$cns_plane][$cns_row], |
| $cns_data_space[$cns_plane][$cns_row]), |
| " */\n"; |
| $cns_rowoffsets_used[$cns_plane] += 4; |
| } |
| } |
| } |
| else |
| { |
| print OUT " /* plane ", $cns_plane, ": --- */\n"; |
| } |
| } |
| print OUT "};\n\n"; |
| |
| print OUT "static sal_Int32 const aImpl", |
| $id, |
| "ToUnicodePlaneOffsets[] = {\n"; |
| $cns_row_offset = 0; |
| for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) |
| { |
| if (defined ($cns_plane_used[$cns_plane])) |
| { |
| print OUT " ", |
| $cns_row_offset++, |
| " * 94, /* plane ", |
| $cns_plane, |
| "; ", |
| printStats($cns_rowoffsets_used[$cns_plane], 94 * 4), |
| " */\n"; |
| } |
| else |
| { |
| print OUT " -1, /* plane ", $cns_plane, " */\n"; |
| } |
| } |
| print OUT "};\n\n"; |
| |
| print OUT "static sal_uInt8 const aImplUnicodeTo", $id, "Data[] = {\n"; |
| $uni_data_index = 0; |
| for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) |
| { |
| if (defined($uni_plane_used[$uni_plane])) |
| { |
| for ($uni_page = 0; $uni_page <= 255; ++$uni_page) |
| { |
| if (defined($uni_page_used[$uni_plane][$uni_page])) |
| { |
| $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index; |
| print OUT " /* plane ", $uni_plane, ", page ", $uni_page, |
| " */\n"; |
| |
| $uni_page_first = -1; |
| for ($uni_index = 0; $uni_index <= 255; ++$uni_index) |
| { |
| if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) |
| { |
| if ($uni_page_first == -1) |
| { |
| $uni_page_first = $uni_index; |
| } |
| $uni_page_last = $uni_index; |
| } |
| } |
| |
| $uni_data_used[$uni_plane][$uni_page] = 0; |
| |
| print OUT " ", $uni_page_first, ", ", $uni_page_last, |
| ", /* first, last */\n"; |
| $uni_data_index += 2; |
| $uni_data_used[$uni_plane][$uni_page] += 2; |
| |
| print OUT " ", printSpaces(9, 8, $uni_page_first); |
| $bol = 0; |
| for ($uni_index = $uni_page_first; |
| $uni_index <= $uni_page_last; |
| ++$uni_index) |
| { |
| if ($bol == 1) |
| { |
| print OUT " "; |
| $bol = 0; |
| } |
| if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) |
| { |
| $cns = $uni_map[$uni_plane][$uni_page][$uni_index]; |
| printf OUT "%2d,%2d,%2d,", |
| $cns >> 16, |
| $cns >> 8 & 0xFF, |
| $cns & 0xFF; |
| $uni_data_used[$uni_plane][$uni_page] += 3; |
| } |
| else |
| { |
| print OUT " 0, 0, 0,"; |
| } |
| $uni_data_index += 3; |
| if ($uni_index % 8 == 7) |
| { |
| print OUT "\n"; |
| $bol = 1; |
| } |
| } |
| if ($bol == 0) |
| { |
| print OUT "\n"; |
| } |
| |
| $uni_data_space[$uni_plane][$uni_page] |
| = $uni_data_index |
| - $uni_data_offsets[$uni_plane][$uni_page]; |
| } |
| else |
| { |
| $uni_data_offsets[$uni_plane][$uni_page] = -1; |
| print OUT " /* plane ", $uni_plane, ", page ", $uni_page, |
| ": --- */\n"; |
| } |
| } |
| } |
| else |
| { |
| print OUT " /* plane ", $uni_plane, ": --- */\n"; |
| } |
| } |
| print OUT "};\n\n"; |
| |
| print OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n"; |
| for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) |
| { |
| if (defined($uni_plane_used[$uni_plane])) |
| { |
| $uni_pageoffsets_used[$uni_plane] = 0; |
| $uni_data_used_sum[$uni_plane] = 0; |
| $uni_data_space_sum[$uni_plane] = 0; |
| for ($uni_page = 0; $uni_page <= 255; ++$uni_page) |
| { |
| $offset = $uni_data_offsets[$uni_plane][$uni_page]; |
| if ($offset == -1) |
| { |
| print OUT " -1, /* plane ", |
| $uni_plane, |
| ", page ", |
| $uni_page, |
| " */\n"; |
| } |
| else |
| { |
| print OUT " ", |
| $offset, |
| ", /* plane ", |
| $uni_plane, |
| ", page ", |
| $uni_page, |
| "; ", |
| printStats($uni_data_used[$uni_plane][$uni_page], |
| $uni_data_space[$uni_plane][$uni_page]), |
| " */\n"; |
| $uni_pageoffsets_used[$uni_plane] += 4; |
| $uni_data_used_sum[$uni_plane] |
| += $uni_data_used[$uni_plane][$uni_page]; |
| $uni_data_space_sum[$uni_plane] |
| += $uni_data_space[$uni_plane][$uni_page]; |
| } |
| } |
| } |
| else |
| { |
| print OUT " /* plane ", $uni_plane, ": --- */\n"; |
| } |
| } |
| print OUT "};\n\n"; |
| |
| print OUT "static sal_Int32 const aImplUnicodeTo", |
| $id, |
| "PlaneOffsets[] = {\n"; |
| $uni_page_offset = 0; |
| $uni_planeoffsets_used = 0; |
| $uni_pageoffsets_used_sum = 0; |
| $uni_pageoffsets_space_sum = 0; |
| $uni_data_used_sum2 = 0; |
| $uni_data_space_sum2 = 0; |
| for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) |
| { |
| if (defined ($uni_plane_used[$uni_plane])) |
| { |
| print OUT " ", |
| $uni_page_offset++, |
| " * 256, /* plane ", |
| $uni_plane, |
| "; ", |
| printStats($uni_pageoffsets_used[$uni_plane], 256 * 4), |
| ", ", |
| printStats($uni_data_used_sum[$uni_plane], |
| $uni_data_space_sum[$uni_plane]), |
| " */\n"; |
| $uni_planeoffsets_used += 4; |
| $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane]; |
| $uni_pageoffsets_space_sum += 256 * 4; |
| $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane]; |
| $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane]; |
| } |
| else |
| { |
| print OUT " -1, /* plane ", $uni_plane, " */\n"; |
| } |
| } |
| print OUT " /* ", |
| printStats($uni_planeoffsets_used, 17 * 4), |
| ", ", |
| printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum), |
| ", ", |
| printStats($uni_data_used_sum2, $uni_data_space_sum2), |
| " */\n};\n"; |
| |
| close OUT; |
| |
| print "Unihan.txt = ", $count_Unihan_txt, |
| ", CNS11643.TXT = ", $count_CNS11643_TXT, |
| ", Uni2CNS = ", $count_Uni2CNS, |
| ", total = ", |
| ($count_Unihan_txt + $count_CNS11643_TXT + $count_Uni2CNS), |
| "\n"; |