blob: dcf8fbb89a8fd2155e0285f74f60c45d3f3b60e1 [file] [log] [blame]
/* $Id$
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version
* 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* etch_encoding.c -- character encoding
*/
#include <apr_iconv.h>
#include "etch_encoding.h"
#include "etch_log.h"
#include "etch_mem.h"
#include <wchar.h>
static const char* LOG_CATEGORY = "etch_encoding";
extern apr_pool_t* g_etch_main_pool;
extern apr_thread_mutex_t* g_etch_main_pool_mutex;
static const unsigned char CODEPAGE_TABLE_SIZE = 6;
static apr_iconv_t* codepage_table = NULL;
etch_status_t etch_encoding_initialize()
{
size_t s = CODEPAGE_TABLE_SIZE * CODEPAGE_TABLE_SIZE * sizeof(apr_iconv_t);
codepage_table = etch_malloc(s,0);
memset(codepage_table, 0, s);
return ETCH_SUCCESS;
}
etch_status_t etch_encoding_shutdown()
{
int i = 0;
apr_thread_mutex_lock(g_etch_main_pool_mutex);
for(i = 0; i < CODEPAGE_TABLE_SIZE * CODEPAGE_TABLE_SIZE; i++) {
if(codepage_table[i] != NULL) {
apr_iconv_close(codepage_table[i], g_etch_main_pool);
}
}
apr_thread_mutex_unlock(g_etch_main_pool_mutex);
etch_free(codepage_table);
codepage_table = NULL;
return ETCH_SUCCESS;
}
static char* etch_encoding_get_encoding(unsigned char encoding)
{
switch (encoding) {
case ETCH_ENCODING_ASCII:
return "iso-8859-1";
case ETCH_ENCODING_UTF8:
return "utf-8";
case ETCH_ENCODING_UCS2:
return "ucs2-internal";
case ETCH_ENCODING_UCS4:
return "ucs4-internal";
case ETCH_ENCODING_UTF16:
return "utf-16";
default:
ETCH_LOG(LOG_CATEGORY, ETCH_LOG_ERROR, "unsupported src-encoding %d\n", encoding);
ETCH_ASSERT(!"encoding not supported");
return 0;
}
}
etch_status_t etch_encoding_get_codepage(unsigned char src, unsigned char dst, apr_iconv_t* codepage)
{
apr_iconv_t* temp = NULL;
apr_status_t status = APR_SUCCESS;
char* apr_codepage_src = NULL;
char* apr_codepage_dst = NULL;
apr_codepage_src = etch_encoding_get_encoding(src);
apr_codepage_dst = etch_encoding_get_encoding(dst);
if (apr_codepage_src == NULL || apr_codepage_dst == NULL) {
return ETCH_EINVAL;
}
apr_thread_mutex_lock(g_etch_main_pool_mutex);
temp = &codepage_table[(CODEPAGE_TABLE_SIZE * src) + dst];
if (*temp == NULL) {
status = apr_iconv_open(apr_codepage_dst, apr_codepage_src, g_etch_main_pool, temp);
}
*codepage = *temp;
apr_thread_mutex_unlock(g_etch_main_pool_mutex);
if(status != APR_SUCCESS){
return ETCH_ERROR;
}
return ETCH_SUCCESS;
}
int etch_encoding_transcode(char** out, unsigned char outEncoding, const char* in, unsigned char inEncoding, unsigned int inByteCount, int* resultingByteCount, etch_pool_t* pool)
{
// in
const char* apr_in_buf = in;
apr_size_t apr_in_bytecount = inByteCount;
// out
char* apr_out_buf = NULL;
apr_size_t apr_out_bytecount = 0;
apr_size_t apr_out_bytecount_original;
// translated
apr_size_t apr_translated = 0;
// converter
apr_iconv_t apr_cd = NULL;
apr_status_t apr_status = 0;
etch_status_t etch_status;
// chech input params
if (NULL == out) {
return -1;
}
if(pool == NULL) {
pool = g_etch_main_pool;
}
apr_out_bytecount = apr_in_bytecount*4; // worst case ascii->ucs4
apr_out_bytecount_original = apr_out_bytecount;
apr_out_buf = etch_malloc(apr_out_bytecount, ETCHTYPEB_BYTES);
(*out) = apr_out_buf;
memset(apr_out_buf, 0, apr_out_bytecount);
if(inEncoding == outEncoding){
memcpy(apr_out_buf,in,inByteCount);
return 0;
}
// open iconv
etch_status = etch_encoding_get_codepage(inEncoding, outEncoding, &apr_cd);
if(etch_status != ETCH_SUCCESS)
{
char buffer[512];
apr_strerror(apr_status, buffer, 512);
printf("%s", buffer);
ETCH_LOG(LOG_CATEGORY, ETCH_LOG_ERROR, "encoding conversion error %d\n", apr_status);
return -1;
}
// convert
apr_status = apr_iconv(apr_cd, &apr_in_buf, &apr_in_bytecount, &apr_out_buf, &apr_out_bytecount, &apr_translated);
if(apr_status != APR_SUCCESS)
{
char buffer[512];
apr_strerror(apr_status, buffer, 512);
ETCH_LOG(LOG_CATEGORY, ETCH_LOG_ERROR, "encoding conversion error-code: %d error-msg: %s\n", apr_status, buffer);
//clean up
etch_free((*out));
(*out) = NULL;
return -1;
}
*resultingByteCount = (int)(apr_out_bytecount_original - apr_out_bytecount);
return 0;
}
unsigned char etch_encoding_for_wchar()
{
switch (sizeof(wchar_t)) {
case 2:
return ETCH_ENCODING_UCS2;
case 4:
return ETCH_ENCODING_UCS4;
default:
ETCH_ASSERT(!"unknown wchar_t size");
return 0;
}
}
int etch_encoding_transcode_wchar(char** out, unsigned char outEncoding, const wchar_t* in, etch_pool_t* pool)
{
int outByteCount;
unsigned int charcount;
unsigned char srcEncoding;
charcount = (unsigned int)wcslen(in);
srcEncoding = etch_encoding_for_wchar();
return etch_encoding_transcode(out, outEncoding, (const char*)in, srcEncoding, charcount * sizeof(wchar_t), &outByteCount, pool);
}
int etch_encoding_transcode_to_wchar(wchar_t** out, const void* in, unsigned char inEncoding, unsigned int inByteCount, etch_pool_t* pool)
{
int outByteCount;
unsigned char dstEncoding;
dstEncoding = etch_encoding_for_wchar();
return etch_encoding_transcode((char**)out, dstEncoding, in, inEncoding, inByteCount, &outByteCount, pool);
}
unsigned int etch_encoding_get_sizeof_terminator(unsigned char encoding)
{
switch (encoding) {
case ETCH_ENCODING_ASCII:
case ETCH_ENCODING_UTF8:
return 1;
case ETCH_ENCODING_UCS2:
return 2;
case ETCH_ENCODING_UCS4:
return 4;
case ETCH_ENCODING_UTF16:
case ETCH_ENCODING_UTF32:
default:
ETCH_ASSERT(!"encoding not supported");
return 0;
}
}