blob: 27ab6882eee0d458ef8b7377f89c410898db083a [file] [log] [blame]
/**************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*************************************************************/
// MARKER(update_precomp.py): autogen include statement, do not remove
#include "precompiled_i18npool.hxx"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sal/main.h>
#include <sal/types.h>
#include <rtl/strbuf.hxx>
#include <rtl/ustring.hxx>
using namespace ::rtl;
/* Main Procedure */
SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
{
FILE *sfp, *cfp;
if (argc < 3) exit(-1);
sfp = fopen(argv[1], "rb"); // open the source file for read;
if (sfp == NULL)
{
printf("Open the dictionary source file failed.");
return -1;
}
// create the C source file to write
cfp = fopen(argv[2], "wb");
if (cfp == NULL) {
fclose(sfp);
printf("Can't create the C source file.");
return -1;
}
fprintf(cfp, "/*\n");
fprintf(cfp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n");
fprintf(cfp, " * All Rights Reserved.\n");
fprintf(cfp, " */\n\n");
fprintf(cfp, "/* !!!The file is generated automatically. DONOT edit the file manually!!! */\n\n");
fprintf(cfp, "#include <sal/types.h>\n\n");
fprintf(cfp, "extern \"C\" {\n");
sal_Int32 count, i, j;
sal_Int32 lenArrayCurr = 0, lenArrayCount = 0, lenArrayLen = 0, *lenArray = NULL, charArray[0x10000];
sal_Bool exist[0x10000];
for (i = 0; i < 0x10000; i++) {
exist[i] = sal_False;
charArray[i] = 0;
}
// generate main dict. data array
fprintf(cfp, "static const sal_Unicode dataArea[] = {");
sal_Char str[1024];
sal_Unicode current = 0;
count = 0;
while (fgets(str, 1024, sfp)) {
// input file is in UTF-8 encoding
// don't convert last new line character to Ostr.
OUString Ostr((const sal_Char *)str, strlen(str) - 1, RTL_TEXTENCODING_UTF8);
const sal_Unicode *u = Ostr.getStr();
sal_Int32 len = Ostr.getLength();
i=0;
Ostr.iterateCodePoints(&i, 1);
if (len == i) continue; // skip one character word
if (*u != current) {
if (*u < current)
printf("u %x, current %x, count %d, lenArrayCount %d\n", *u, current,
sal::static_int_cast<int>(count), sal::static_int_cast<int>(lenArrayCount));
current = *u;
charArray[current] = lenArrayCount;
}
if (lenArrayLen <= lenArrayCount+1)
lenArray = (sal_Int32*) realloc(lenArray, (lenArrayLen += 1000) * sizeof(sal_Int32));
lenArray[lenArrayCount++] = lenArrayCurr;
exist[u[0]] = sal_True;
for (i = 1; i < len; i++) { // start from second character,
exist[u[i]] = sal_True; // since the first character is captured in charArray.
lenArrayCurr++;
if ((count++) % 0x10 == 0)
fprintf(cfp, "\n\t");
fprintf(cfp, "0x%04x, ", u[i]);
}
}
lenArray[lenArrayCount++] = lenArrayCurr; // store last ending pointer
charArray[current+1] = lenArrayCount;
fprintf(cfp, "\n};\n");
// generate lenArray
fprintf(cfp, "static const sal_Int32 lenArray[] = {\n\t");
count = 1;
fprintf(cfp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
for (i = 0; i < lenArrayCount; i++) {
fprintf(cfp, "0x%lx, ", static_cast<long unsigned int>(lenArray[i]));
if (count == 0xf) {
count = 0;
fprintf(cfp, "\n\t");
} else count++;
}
fprintf(cfp, "\n};\n");
free(lenArray);
// generate index1 array
fprintf (cfp, "static const sal_Int16 index1[] = {\n\t");
sal_Int16 set[0x100];
count = 0;
for (i = 0; i < 0x100; i++) {
for (j = 0; j < 0x100; j++)
if (charArray[(i*0x100) + j] != 0)
break;
fprintf(cfp, "0x%02x, ", set[i] = (j < 0x100 ? sal::static_int_cast<sal_Int16>(count++) : 0xff));
if ((i+1) % 0x10 == 0)
fprintf (cfp, "\n\t");
}
fprintf (cfp, "};\n");
// generate index2 array
fprintf (cfp, "static const sal_Int32 index2[] = {\n\t");
sal_Int32 prev = 0;
for (i = 0; i < 0x100; i++) {
if (set[i] != 0xff) {
for (j = 0; j < 0x100; j++) {
sal_Int32 k = (i*0x100) + j;
if (prev != 0 && charArray[k] == 0) {
for (k++; k < 0x10000; k++)
if (charArray[k] != 0)
break;
}
prev = charArray[(i*0x100) + j];
fprintf(
cfp, "0x%lx, ",
sal::static_int_cast< unsigned long >(
k < 0x10000 ? charArray[k] + 1 : 0));
if ((j+1) % 0x10 == 0)
fprintf (cfp, "\n\t");
}
fprintf (cfp, "\n\t");
}
}
fprintf (cfp, "\n};\n");
// generate existMark array
count = 0;
fprintf (cfp, "static const sal_uInt8 existMark[] = {\n\t");
for (i = 0; i < 0x1FFF; i++) {
sal_uInt8 bit = 0;
for (j = 0; j < 8; j++)
if (exist[i * 8 + j])
bit |= 1 << j;
fprintf(cfp, "0x%02x, ", bit);
if (count == 0xf) {
count = 0;
fprintf(cfp, "\n\t");
} else count++;
}
fprintf (cfp, "\n};\n");
// create function to return arrays
fprintf (cfp, "\tconst sal_uInt8* getExistMark() { return existMark; }\n");
fprintf (cfp, "\tconst sal_Int16* getIndex1() { return index1; }\n");
fprintf (cfp, "\tconst sal_Int32* getIndex2() { return index2; }\n");
fprintf (cfp, "\tconst sal_Int32* getLenArray() { return lenArray; }\n");
fprintf (cfp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
fprintf (cfp, "}\n");
fclose(sfp);
fclose(cfp);
return 0;
} // End of main