blob: e865f5890ec64c755d35e99926a162638d162c8d [file] [log] [blame]
/**************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*************************************************************/
// MARKER(update_precomp.py): autogen include statement, do not remove
#include "precompiled_l10ntools.hxx"
#include <stdio.h>
#include <tools/fsys.hxx>
#include <tools/stream.hxx>
// local includes
#include "utf8conv.hxx"
#define GSI_FILE_UNKNOWN 0x0000
#define GSI_FILE_OLDSTYLE 0x0001
#define GSI_FILE_L10NFRAMEWORK 0x0002
/*****************************************************************************/
sal_uInt16 GetGSIFileType( SvStream &rStream )
/*****************************************************************************/
{
sal_uInt16 nFileType = GSI_FILE_UNKNOWN;
sal_uLong nPos( rStream.Tell());
rStream.Seek( STREAM_SEEK_TO_BEGIN );
ByteString sLine;
while( !rStream.IsEof() && !sLine.Len())
rStream.ReadLine( sLine );
if( sLine.Len()) {
if( sLine.Search( "($$)" ) != STRING_NOTFOUND )
nFileType = GSI_FILE_OLDSTYLE;
else
nFileType = GSI_FILE_L10NFRAMEWORK;
}
rStream.Seek( nPos );
return nFileType;
}
/*****************************************************************************/
ByteString GetGSILineId( const ByteString &rLine, sal_uInt16 nFileType )
/*****************************************************************************/
{
ByteString sId;
switch ( nFileType ) {
case GSI_FILE_OLDSTYLE:
sId = rLine;
sId.SearchAndReplaceAll( "($$)", "\t" );
sId = sId.GetToken( 0, '\t' );
break;
case GSI_FILE_L10NFRAMEWORK:
sId = rLine.GetToken( 0, '\t' );
sId += "\t";
sId += rLine.GetToken( 1, '\t' );
sId += "\t";
sId += rLine.GetToken( 4, '\t' );
sId += "\t";
sId += rLine.GetToken( 5, '\t' );
break;
}
return sId;
}
/*****************************************************************************/
ByteString GetGSILineLangId( const ByteString &rLine, sal_uInt16 nFileType )
/*****************************************************************************/
{
ByteString sLangId;
switch ( nFileType ) {
case GSI_FILE_OLDSTYLE:
sLangId = rLine;
sLangId.SearchAndReplaceAll( "($$)", "\t" );
sLangId = sLangId.GetToken( 2, '\t' );
break;
case GSI_FILE_L10NFRAMEWORK:
sLangId = rLine.GetToken( 9, '\t' );
break;
}
return sLangId;
}
/*****************************************************************************/
void ConvertGSILine( sal_Bool bToUTF8, ByteString &rLine,
rtl_TextEncoding nEncoding, sal_uInt16 nFileType )
/*****************************************************************************/
{
switch ( nFileType ) {
case GSI_FILE_OLDSTYLE:
if ( bToUTF8 )
rLine = UTF8Converter::ConvertToUTF8( rLine, nEncoding );
else
rLine = UTF8Converter::ConvertFromUTF8( rLine, nEncoding );
break;
case GSI_FILE_L10NFRAMEWORK: {
ByteString sConverted;
for ( sal_uInt16 i = 0; i < rLine.GetTokenCount( '\t' ); i++ ) {
ByteString sToken = rLine.GetToken( i, '\t' );
if (( i > 9 ) && ( i < 14 )) {
if( bToUTF8 )
sToken = UTF8Converter::ConvertToUTF8( sToken, nEncoding );
else
sToken = UTF8Converter::ConvertFromUTF8( sToken, nEncoding );
}
if ( i )
sConverted += "\t";
sConverted += sToken;
}
rLine = sConverted;
}
break;
}
}
/*****************************************************************************/
void Help()
/*****************************************************************************/
{
fprintf( stdout, "\n" );
fprintf( stdout, "gsiconv (c)1999 by StarOffice Entwicklungs GmbH\n" );
fprintf( stdout, "===============================================\n" );
fprintf( stdout, "\n" );
fprintf( stdout, "gsiconv converts strings in GSI-Files (Gutschmitt Interface) from or to UTF-8\n" );
fprintf( stdout, "\n" );
fprintf( stdout, "Syntax: gsiconv (-t|-f langid charset)|(-p n) filename\n" );
fprintf( stdout, "Switches: -t => conversion from charset to UTF-8\n" );
fprintf( stdout, " -f => conversion from UTF-8 to charset\n" );
fprintf( stdout, " -p n => creates several files with ca. n lines\n" );
fprintf( stdout, "\n" );
fprintf( stdout, "Allowed charsets:\n" );
fprintf( stdout, " MS_932 => Japanese\n" );
fprintf( stdout, " MS_936 => Chinese Simplified\n" );
fprintf( stdout, " MS_949 => Korean\n" );
fprintf( stdout, " MS_950 => Chinese Traditional\n" );
fprintf( stdout, " MS_1250 => East Europe\n" );
fprintf( stdout, " MS_1251 => Cyrillic\n" );
fprintf( stdout, " MS_1252 => West Europe\n" );
fprintf( stdout, " MS_1253 => Greek\n" );
fprintf( stdout, " MS_1254 => Turkish\n" );
fprintf( stdout, " MS_1255 => Hebrew\n" );
fprintf( stdout, " MS_1256 => Arabic\n" );
fprintf( stdout, "\n" );
fprintf( stdout, "Allowed langids:\n" );
fprintf( stdout, " 1 => ENGLISH_US\n" );
fprintf( stdout, " 3 => PORTUGUESE \n" );
fprintf( stdout, " 4 => GERMAN_DE (new german style)\n" );
fprintf( stdout, " 7 => RUSSIAN\n" );
fprintf( stdout, " 30 => GREEK\n" );
fprintf( stdout, " 31 => DUTCH\n" );
fprintf( stdout, " 33 => FRENCH\n" );
fprintf( stdout, " 34 => SPANISH\n" );
fprintf( stdout, " 35 => FINNISH\n" );
fprintf( stdout, " 36 => HUNGARIAN\n" );
fprintf( stdout, " 39 => ITALIAN\n" );
fprintf( stdout, " 42 => CZECH\n" );
fprintf( stdout, " 44 => ENGLISH (UK)\n" );
fprintf( stdout, " 45 => DANISH\n" );
fprintf( stdout, " 46 => SWEDISH\n" );
fprintf( stdout, " 47 => NORWEGIAN\n" );
fprintf( stdout, " 49 => GERMAN (old german style)\n" );
fprintf( stdout, " 55 => PORTUGUESE_BRAZILIAN\n" );
fprintf( stdout, " 81 => JAPANESE\n" );
fprintf( stdout, " 82 => KOREAN\n" );
fprintf( stdout, " 86 => CHINESE_SIMPLIFIED\n" );
fprintf( stdout, " 88 => CHINESE_TRADITIONAL\n" );
fprintf( stdout, " 90 => TURKISH\n" );
fprintf( stdout, " 96 => ARABIC\n" );
fprintf( stdout, " 97 => HEBREW\n" );
fprintf( stdout, "\n" );
}
/*****************************************************************************/
#if defined(UNX) || defined(OS2)
int main( int argc, char *argv[] )
#else
int _cdecl main( int argc, char *argv[] )
#endif
/*****************************************************************************/
{
if (( argc != 5 ) && ( argc != 4 )) {
Help();
exit ( 0 );
}
if ( argc == 4 ) {
if ( ByteString( argv[ 1 ] ) == "-p" ) {
DirEntry aSource = DirEntry( String( argv[ 3 ], RTL_TEXTENCODING_ASCII_US ));
if ( !aSource.Exists()) {
fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
exit ( 2 );
}
DirEntry aOutput( aSource );
String sBase = aOutput.GetBase();
String sExt = aOutput.GetExtension();
String sGSI( argv[ 3 ], RTL_TEXTENCODING_ASCII_US );
SvFileStream aGSI( sGSI, STREAM_STD_READ );
if ( !aGSI.IsOpen()) {
fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
exit ( 3 );
}
sal_uInt16 nFileType( GetGSIFileType( aGSI ));
sal_uLong nMaxLines = (sal_uLong) ByteString( argv[ 2 ] ).ToInt64();
if ( !nMaxLines ) {
fprintf( stderr, "\nERROR: Linecount must be at least 1!\n\n" );
exit ( 3 );
}
ByteString sGSILine;
ByteString sOldId;
sal_uLong nLine = 0;
sal_uLong nOutputFile = 1;
String sOutput( sBase );
sOutput += String( "_", RTL_TEXTENCODING_ASCII_US );
sOutput += String::CreateFromInt64( nOutputFile );
if ( sExt.Len()) {
sOutput += String( ".", RTL_TEXTENCODING_ASCII_US );
sOutput += sExt;
}
nOutputFile ++;
aOutput.SetName( sOutput );
SvFileStream aOutputStream( aOutput.GetFull(), STREAM_STD_WRITE | STREAM_TRUNC );
while ( !aGSI.IsEof()) {
aGSI.ReadLine( sGSILine );
ByteString sId( GetGSILineId( sGSILine, nFileType ));
nLine++;
if (( nLine >= nMaxLines ) && ( sId != sOldId )) {
aOutputStream.Close();
ByteString sText( aOutput.GetFull(), gsl_getSystemTextEncoding());
sText += " with ";
sText += ByteString::CreateFromInt64( nLine );
sText += " lines written.";
fprintf( stdout, "%s\n", sText.GetBuffer());
String sOutput1( sBase );
sOutput1 += String( "_", RTL_TEXTENCODING_ASCII_US );
sOutput1 += String::CreateFromInt64( nOutputFile );
if ( sExt.Len()) {
sOutput1 += String( ".", RTL_TEXTENCODING_ASCII_US );
sOutput1 += sExt;
}
nOutputFile ++;
aOutput.SetName( sOutput1 );
aOutputStream.Open( aOutput.GetFull(), STREAM_STD_WRITE | STREAM_TRUNC );
nLine = 0;
}
aOutputStream.WriteLine( sGSILine );
sOldId = sId;
}
aGSI.Close();
aOutputStream.Close();
ByteString sText( aOutput.GetFull(), RTL_TEXTENCODING_ASCII_US );
sText += " with ";
sText += ByteString::CreateFromInt64( nLine );
sText += " lines written.";
}
else {
Help();
exit( 1 );
}
}
else {
if ( ByteString( argv[ 1 ] ) == "-t" || ByteString( argv[ 1 ] ) == "-f" ) {
rtl_TextEncoding nEncoding;
ByteString sCurLangId( argv[ 2 ] );
ByteString sCharset( argv[ 3 ] );
sCharset.ToUpperAscii();
if ( sCharset == "MS_932" ) nEncoding = RTL_TEXTENCODING_MS_932;
else if ( sCharset == "MS_936" ) nEncoding = RTL_TEXTENCODING_MS_936;
else if ( sCharset == "MS_949" ) nEncoding = RTL_TEXTENCODING_MS_949;
else if ( sCharset == "MS_950" ) nEncoding = RTL_TEXTENCODING_MS_950;
else if ( sCharset == "MS_1250" ) nEncoding = RTL_TEXTENCODING_MS_1250;
else if ( sCharset == "MS_1251" ) nEncoding = RTL_TEXTENCODING_MS_1251;
else if ( sCharset == "MS_1252" ) nEncoding = RTL_TEXTENCODING_MS_1252;
else if ( sCharset == "MS_1253" ) nEncoding = RTL_TEXTENCODING_MS_1253;
else if ( sCharset == "MS_1254" ) nEncoding = RTL_TEXTENCODING_MS_1254;
else if ( sCharset == "MS_1255" ) nEncoding = RTL_TEXTENCODING_MS_1255;
else if ( sCharset == "MS_1256" ) nEncoding = RTL_TEXTENCODING_MS_1256;
else if ( sCharset == "MS_1257" ) nEncoding = RTL_TEXTENCODING_MS_1257;
else if ( sCharset == "UTF8" ) nEncoding = RTL_TEXTENCODING_UTF8;
else {
Help();
exit ( 1 );
}
DirEntry aSource = DirEntry( String( argv[ 4 ], RTL_TEXTENCODING_ASCII_US ));
if ( !aSource.Exists()) {
fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
exit ( 2 );
}
String sGSI( argv[ 4 ], RTL_TEXTENCODING_ASCII_US );
SvFileStream aGSI( sGSI, STREAM_STD_READ );
if ( !aGSI.IsOpen()) {
fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
exit ( 3 );
}
sal_uInt16 nFileType( GetGSIFileType( aGSI ));
ByteString sGSILine;
while ( !aGSI.IsEof()) {
aGSI.ReadLine( sGSILine );
ByteString sLangId( GetGSILineLangId( sGSILine, nFileType ));
if ( sLangId == sCurLangId )
ConvertGSILine(( ByteString( argv[ 1 ] ) == "-t" ), sGSILine, nEncoding, nFileType );
fprintf( stdout, "%s\n", sGSILine.GetBuffer());
}
aGSI.Close();
}
else {
Help();
exit( 1 );
}
}
return 0;
}