blob: 9bac9cd7bc5c851d895572b8ea638a28e9654113 [file] [log] [blame]
/**************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <ctype.h>
#include <sal/alloca.h>
#include <rtl/ustring.hxx>
#include <map>
#include <string>
/*****************************************************************************
* typedefs
*****************************************************************************/
typedef std::map< const std::string, rtl_TextEncoding > EncodingMap;
struct _pair {
const char *key;
rtl_TextEncoding value;
};
static int _pair_compare (const char *key, const _pair *pair);
static const _pair* _pair_search (const char *key, const _pair *base, unsigned int member );
const _pair _ms_encoding_list[] = {
{ "0", RTL_TEXTENCODING_UTF8 },
{ "1250", RTL_TEXTENCODING_MS_1250 },
{ "1251", RTL_TEXTENCODING_MS_1251 },
{ "1252", RTL_TEXTENCODING_MS_1252 },
{ "1253", RTL_TEXTENCODING_MS_1253 },
{ "1254", RTL_TEXTENCODING_MS_1254 },
{ "1255", RTL_TEXTENCODING_MS_1255 },
{ "1256", RTL_TEXTENCODING_MS_1256 },
{ "1257", RTL_TEXTENCODING_MS_1257 },
{ "1258", RTL_TEXTENCODING_MS_1258 },
{ "874", RTL_TEXTENCODING_MS_874 },
{ "932", RTL_TEXTENCODING_MS_932 },
{ "936", RTL_TEXTENCODING_MS_936 },
{ "949", RTL_TEXTENCODING_MS_949 },
{ "950", RTL_TEXTENCODING_MS_950 }
};
/*****************************************************************************
* fgets that work with unix line ends on Windows
*****************************************************************************/
char * my_fgets(char *s, int n, FILE *fp)
{
int i;
for( i=0; i < n-1; i++ )
{
int c = getc(fp);
if( c == EOF )
break;
s[i] = (char) c;
if( s[i] == '\n' )
{
i++;
break;
}
}
if( i>0 )
{
s[i] = '\0';
return s;
}
else
{
return NULL;
}
}
/*****************************************************************************
* compare function for binary search
*****************************************************************************/
static int
_pair_compare (const char *key, const _pair *pair)
{
int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
return result;
}
/*****************************************************************************
* binary search on encoding tables
*****************************************************************************/
static const _pair*
_pair_search (const char *key, const _pair *base, unsigned int member )
{
unsigned int lower = 0;
unsigned int upper = member;
unsigned int current;
int comparison;
/* check for validity of input */
if ( (key == NULL) || (base == NULL) || (member == 0) )
return NULL;
/* binary search */
while ( lower < upper )
{
current = (lower + upper) / 2;
comparison = _pair_compare( key, base + current );
if (comparison < 0)
upper = current;
else
if (comparison > 0)
lower = current + 1;
else
return base + current;
}
return NULL;
}
/************************************************************************
* read_encoding_table
************************************************************************/
void read_encoding_table(char * file, EncodingMap& aEncodingMap)
{
FILE * fp = fopen(file, "r");
if ( ! fp ) {
fprintf(stderr, "ulfconv: %s %s\n", file, strerror(errno));
exit(2);
}
char buffer[512];
while ( NULL != my_fgets(buffer, sizeof(buffer), fp) ) {
// strip comment lines
if ( buffer[0] == '#' )
continue;
// find end of language string
char * cp;
for ( cp = buffer; ! isspace(*cp); cp++ )
;
*cp = '\0';
// find start of codepage string
for ( ++cp; isspace(*cp); ++cp )
;
char * codepage = cp;
// find end of codepage string
for ( ++cp; ! isspace(*cp); ++cp )
;
*cp = '\0';
// find the correct mapping for codepage
const unsigned int members = sizeof( _ms_encoding_list ) / sizeof( _pair );
const _pair *encoding = _pair_search( codepage, _ms_encoding_list, members );
if ( encoding != NULL ) {
const std::string language(buffer);
aEncodingMap.insert( EncodingMap::value_type(language, encoding->value) );
}
}
fclose(fp);
}
/************************************************************************
* print_legacy_mixed
************************************************************************/
void print_legacy_mixed(
FILE * ostream,
const rtl::OUString& aString,
const std::string& language,
EncodingMap& aEncodingMap)
{
EncodingMap::iterator iter = aEncodingMap.find(language);
if ( iter != aEncodingMap.end() ) {
fputs(OUStringToOString(aString, iter->second).getStr(), ostream);
} else {
fprintf(stderr, "ulfconv: WARNING: no legacy encoding found for %s\n", language.c_str());
}
}
/************************************************************************
* print_java_style
************************************************************************/
void print_java_style(FILE * ostream, const rtl::OUString& aString)
{
int imax = aString.getLength();
for (int i = 0; i < imax; i++) {
sal_Unicode uc = aString[i];
if ( uc < 128 ) {
fprintf(ostream, "%c", (char) uc);
} else {
fprintf(ostream, "\\u%2.2x%2.2x", uc >> 8, uc & 0xFF );
}
}
}
/************************************************************************
* main
************************************************************************/
int main( int argc, char * const argv[] )
{
EncodingMap aEncodingMap;
FILE *istream = stdin;
FILE *ostream = stdout;
char *outfile = NULL;
int errflg = 0;
int argi;
for( argi=1; argi < argc; argi++ )
{
if( argv[argi][0] == '-' && argv[argi][2] == '\0' )
{
switch(argv[argi][1]) {
case 'o':
if (argi+1 >= argc || argv[argi+1][0] == '-')
{
fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]);
errflg++;
break;
}
++argi;
outfile = argv[argi];
break;
case 't':
if (argi+1 >= argc || argv[argi+1][0] == '-')
{
fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]);
errflg++;
break;
}
read_encoding_table(argv[++argi], aEncodingMap);
break;
default:
fprintf(stderr, "Unrecognized option: -%c\n", argv[argi][1]);
errflg++;
}
}
else
{
break;
}
}
if (errflg) {
fprintf(stderr, "Usage: ulfconv [-o <output file>] [-t <encoding table>] [<ulf file>]\n");
exit(2);
}
/* assign input file to stdin */
if ( argi < argc )
{
istream = fopen(argv[argi], "r");
if ( istream == NULL ) {
fprintf(stderr, "ulfconv: %s : %s\n", argv[argi], strerror(errno));
exit(2);
}
}
/* open output file if any */
if ( outfile )
{
ostream = fopen(outfile, "w");
if ( ostream == NULL ) {
fprintf(stderr, "ulfconv: %s : %s\n", outfile, strerror(errno));
fclose(istream);
exit(2);
}
}
/* read line by line from stdin */
char buffer[65536];
while ( NULL != fgets(buffer, sizeof(buffer), istream) ) {
/* only handle lines containing " = " */
char * cp = strstr(buffer, " = \"");
if ( cp ) {
rtl::OUString aString;
/* find end of lang string */
int n;
for ( n=0; ! isspace(buffer[n]); n++ )
;
std::string line = buffer;
std::string lang(line, 0, n);
cp += 4;
rtl_string2UString( &aString.pData, cp, strrchr(cp, '\"') - cp,
RTL_TEXTENCODING_UTF8, OSTRING_TO_OUSTRING_CVTFLAGS );
fprintf(ostream, "%s = \"", lang.c_str());
if ( aEncodingMap.empty() ) {
print_java_style(ostream, aString);
} else {
print_legacy_mixed(ostream, aString, lang, aEncodingMap);
}
fprintf(ostream, "\"\n");
} else {
fputs(buffer, ostream);
}
}
fclose(ostream);
fclose(istream);
}