| /************************************************************** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| *************************************************************/ |
| |
| |
| |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <fcntl.h> |
| #include <errno.h> |
| #include <string.h> |
| #include <unistd.h> |
| #include <ctype.h> |
| #include <sal/alloca.h> |
| |
| #include <rtl/ustring.hxx> |
| |
| #include <map> |
| #include <string> |
| |
| /***************************************************************************** |
| * typedefs |
| *****************************************************************************/ |
| |
| typedef std::map< const std::string, rtl_TextEncoding > EncodingMap; |
| |
| struct _pair { |
| const char *key; |
| rtl_TextEncoding value; |
| }; |
| |
| static int _pair_compare (const char *key, const _pair *pair); |
| static const _pair* _pair_search (const char *key, const _pair *base, unsigned int member ); |
| |
| |
| const _pair _ms_encoding_list[] = { |
| { "0", RTL_TEXTENCODING_UTF8 }, |
| { "1250", RTL_TEXTENCODING_MS_1250 }, |
| { "1251", RTL_TEXTENCODING_MS_1251 }, |
| { "1252", RTL_TEXTENCODING_MS_1252 }, |
| { "1253", RTL_TEXTENCODING_MS_1253 }, |
| { "1254", RTL_TEXTENCODING_MS_1254 }, |
| { "1255", RTL_TEXTENCODING_MS_1255 }, |
| { "1256", RTL_TEXTENCODING_MS_1256 }, |
| { "1257", RTL_TEXTENCODING_MS_1257 }, |
| { "1258", RTL_TEXTENCODING_MS_1258 }, |
| { "874", RTL_TEXTENCODING_MS_874 }, |
| { "932", RTL_TEXTENCODING_MS_932 }, |
| { "936", RTL_TEXTENCODING_MS_936 }, |
| { "949", RTL_TEXTENCODING_MS_949 }, |
| { "950", RTL_TEXTENCODING_MS_950 } |
| }; |
| |
| |
| /***************************************************************************** |
| * fgets that work with unix line ends on Windows |
| *****************************************************************************/ |
| |
| char * my_fgets(char *s, int n, FILE *fp) |
| { |
| int i; |
| for( i=0; i < n-1; i++ ) |
| { |
| int c = getc(fp); |
| |
| if( c == EOF ) |
| break; |
| |
| s[i] = (char) c; |
| |
| if( s[i] == '\n' ) |
| { |
| i++; |
| break; |
| } |
| } |
| |
| if( i>0 ) |
| { |
| s[i] = '\0'; |
| return s; |
| } |
| else |
| { |
| return NULL; |
| } |
| } |
| |
| /***************************************************************************** |
| * compare function for binary search |
| *****************************************************************************/ |
| |
| static int |
| _pair_compare (const char *key, const _pair *pair) |
| { |
| int result = rtl_str_compareIgnoreAsciiCase( key, pair->key ); |
| return result; |
| } |
| |
| /***************************************************************************** |
| * binary search on encoding tables |
| *****************************************************************************/ |
| |
| static const _pair* |
| _pair_search (const char *key, const _pair *base, unsigned int member ) |
| { |
| unsigned int lower = 0; |
| unsigned int upper = member; |
| unsigned int current; |
| int comparison; |
| |
| /* check for validity of input */ |
| if ( (key == NULL) || (base == NULL) || (member == 0) ) |
| return NULL; |
| |
| /* binary search */ |
| while ( lower < upper ) |
| { |
| current = (lower + upper) / 2; |
| comparison = _pair_compare( key, base + current ); |
| if (comparison < 0) |
| upper = current; |
| else |
| if (comparison > 0) |
| lower = current + 1; |
| else |
| return base + current; |
| } |
| |
| return NULL; |
| } |
| |
| |
| /************************************************************************ |
| * read_encoding_table |
| ************************************************************************/ |
| |
| void read_encoding_table(char * file, EncodingMap& aEncodingMap) |
| { |
| FILE * fp = fopen(file, "r"); |
| if ( ! fp ) { |
| fprintf(stderr, "ulfconv: %s %s\n", file, strerror(errno)); |
| exit(2); |
| } |
| |
| char buffer[512]; |
| while ( NULL != my_fgets(buffer, sizeof(buffer), fp) ) { |
| |
| // strip comment lines |
| if ( buffer[0] == '#' ) |
| continue; |
| |
| // find end of language string |
| char * cp; |
| for ( cp = buffer; ! isspace(*cp); cp++ ) |
| ; |
| *cp = '\0'; |
| |
| // find start of codepage string |
| for ( ++cp; isspace(*cp); ++cp ) |
| ; |
| char * codepage = cp; |
| |
| // find end of codepage string |
| for ( ++cp; ! isspace(*cp); ++cp ) |
| ; |
| *cp = '\0'; |
| |
| // find the correct mapping for codepage |
| const unsigned int members = sizeof( _ms_encoding_list ) / sizeof( _pair ); |
| const _pair *encoding = _pair_search( codepage, _ms_encoding_list, members ); |
| |
| if ( encoding != NULL ) { |
| const std::string language(buffer); |
| aEncodingMap.insert( EncodingMap::value_type(language, encoding->value) ); |
| } |
| } |
| |
| fclose(fp); |
| } |
| |
| /************************************************************************ |
| * print_legacy_mixed |
| ************************************************************************/ |
| |
| void print_legacy_mixed( |
| FILE * ostream, |
| const rtl::OUString& aString, |
| const std::string& language, |
| EncodingMap& aEncodingMap) |
| { |
| EncodingMap::iterator iter = aEncodingMap.find(language); |
| |
| if ( iter != aEncodingMap.end() ) { |
| fputs(OUStringToOString(aString, iter->second).getStr(), ostream); |
| } else { |
| fprintf(stderr, "ulfconv: WARNING: no legacy encoding found for %s\n", language.c_str()); |
| } |
| } |
| |
| /************************************************************************ |
| * print_java_style |
| ************************************************************************/ |
| |
| void print_java_style(FILE * ostream, const rtl::OUString& aString) |
| { |
| int imax = aString.getLength(); |
| for (int i = 0; i < imax; i++) { |
| sal_Unicode uc = aString[i]; |
| if ( uc < 128 ) { |
| fprintf(ostream, "%c", (char) uc); |
| } else { |
| fprintf(ostream, "\\u%2.2x%2.2x", uc >> 8, uc & 0xFF ); |
| } |
| } |
| } |
| |
| /************************************************************************ |
| * main |
| ************************************************************************/ |
| |
| int main( int argc, char * const argv[] ) |
| { |
| EncodingMap aEncodingMap; |
| |
| FILE *istream = stdin; |
| FILE *ostream = stdout; |
| |
| char *outfile = NULL; |
| |
| int errflg = 0; |
| int argi; |
| |
| for( argi=1; argi < argc; argi++ ) |
| { |
| if( argv[argi][0] == '-' && argv[argi][2] == '\0' ) |
| { |
| switch(argv[argi][1]) { |
| case 'o': |
| if (argi+1 >= argc || argv[argi+1][0] == '-') |
| { |
| fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]); |
| errflg++; |
| break; |
| } |
| |
| ++argi; |
| outfile = argv[argi]; |
| break; |
| case 't': |
| if (argi+1 >= argc || argv[argi+1][0] == '-') |
| { |
| fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]); |
| errflg++; |
| break; |
| } |
| |
| read_encoding_table(argv[++argi], aEncodingMap); |
| break; |
| default: |
| fprintf(stderr, "Unrecognized option: -%c\n", argv[argi][1]); |
| errflg++; |
| } |
| } |
| else |
| { |
| break; |
| } |
| } |
| |
| if (errflg) { |
| fprintf(stderr, "Usage: ulfconv [-o <output file>] [-t <encoding table>] [<ulf file>]\n"); |
| exit(2); |
| } |
| |
| /* assign input file to stdin */ |
| if ( argi < argc ) |
| { |
| istream = fopen(argv[argi], "r"); |
| if ( istream == NULL ) { |
| fprintf(stderr, "ulfconv: %s : %s\n", argv[argi], strerror(errno)); |
| exit(2); |
| } |
| } |
| |
| /* open output file if any */ |
| if ( outfile ) |
| { |
| ostream = fopen(outfile, "w"); |
| if ( ostream == NULL ) { |
| fprintf(stderr, "ulfconv: %s : %s\n", outfile, strerror(errno)); |
| fclose(istream); |
| exit(2); |
| } |
| } |
| |
| /* read line by line from stdin */ |
| char buffer[65536]; |
| while ( NULL != fgets(buffer, sizeof(buffer), istream) ) { |
| |
| /* only handle lines containing " = " */ |
| char * cp = strstr(buffer, " = \""); |
| if ( cp ) { |
| rtl::OUString aString; |
| |
| /* find end of lang string */ |
| int n; |
| for ( n=0; ! isspace(buffer[n]); n++ ) |
| ; |
| |
| std::string line = buffer; |
| std::string lang(line, 0, n); |
| |
| cp += 4; |
| rtl_string2UString( &aString.pData, cp, strrchr(cp, '\"') - cp, |
| RTL_TEXTENCODING_UTF8, OSTRING_TO_OUSTRING_CVTFLAGS ); |
| |
| fprintf(ostream, "%s = \"", lang.c_str()); |
| |
| if ( aEncodingMap.empty() ) { |
| print_java_style(ostream, aString); |
| } else { |
| print_legacy_mixed(ostream, aString, lang, aEncodingMap); |
| } |
| |
| fprintf(ostream, "\"\n"); |
| |
| |
| } else { |
| fputs(buffer, ostream); |
| } |
| } |
| |
| fclose(ostream); |
| fclose(istream); |
| } |